In [1]:
import numpy as np
import typing

In [2]:
class Agent():
    """
    Represents one player having one symbol
    """
    def __init__(self, symbol:np.character=b'x', epsilon:float=0.2, alpha:float=0.5):
        self.symbol = symbol
        self.epsilon=0.2
        self.verbose = False
        self.V = None
        self.state_history = []
        self.alpha = alpha
        
    def set_V_table(self, V:np.ndarray):
        self.V = V
        
    def take_action(self, env:'Environment', agents:typing.Tuple['Agent']):
        if np.random.rand() < self.epsilon:
            if self.verbose:
                print("Agent will take a random action")
                
            spot = env.get_free_spot_random()
            next_move = spot
            
        else:
            best_value = -float('inf')
            for i in range(0, env.n):
                for j in range(0, env.n):
                    if(env.is_spot_free(i=i, j=j)):
                        empty_field = env.board[i, j]
                        env.board[i, j] = self.symbol
                        state = env.to_state(agents=agents)
                        if self.V[state] > best_value:
                            best_value = self.V[state]
                            best_state = state
                            next_move = (i, j)
                            
                        env.board[i, j] = empty_field
        i, j = next_move
        env.set_field(i=i, j=j, agent=self)
            
    def update_state_history(self, s):
        self.state_history.append(s)
        
    def set_verbose(self, verbose:bool=True):
        self.verbose = True
    
    def reset_history(self):
        self.state_history = []
        
    def update(self, env:'Environment'):
        """At the end of an episode recalculate V'S"""
        reward = env.reward(self)
        target = reward
        if self.verbose:
            print("{} got reward {}".format(self.symbol, reward))
            
        for prev in reversed(self.state_history):
            value = self.V[prev] + self.alpha * (target - self.V[prev])
            self.V[prev] = value
            target = value
            
        # clear old history, since game ended
        self.reset_history()

In [3]:
class Human:
    def __init__(self, symbol:np.character):
        self.symbol = symbol
        pass
    
    def take_action(self, env:'Environment', agents:typing.Tuple[Agent]):
        while True:
            env.draw_board()
            move = input("Enter i,j in range 0 to {}: ".format(env.n - 1))
            i, j = move.split(',')
            i = int(i)
            j = int(j)
            if env.is_spot_free(i=i, j=j):
                env.board[i, j] = self.symbol
                break
                
    def update_state_history(self, s):
        pass
    
    def update(self, env:'Environment'):
        pass

In [4]:
class Environment():
    """
    Represents the field
    """
    def __init__(self, n:int=3):
        self.n = n
        self.empty_field()
        
    def to_state(self, agents:typing.List[Agent]) -> int:
        symbols = [b'', *[agent.symbol for agent in agents]]
        
        i = 0
        h = 0
        
        for val in np.nditer(self.board):
            h += len(symbols)**i * symbols.index(val)
            i += 1
        
        return h
    
    
    def empty_field(self):
        self.board = np.zeros(shape=(self.n, self.n), dtype=np.character)

    def set_field(self, i:int, j:int, agent:Agent):
        self.board[i, j] = agent.symbol
        
    def get_free_spot_random(self) -> typing.Tuple[int, int]:
        it = np.nditer(self.board, flags=['multi_index'])
        spots = []
        while not it.finished:
            val = it[0]
            i, j = it.multi_index
            if val == b'': 
                spots.append((i, j))

            it.iternext()
        
        if len(spots) == 0:
            raise IndexError("No empty spot left (Code: 239480293)")
            
        return spots[np.random.choice(len(spots))]
    
    def is_spot_free(self, i:int, j:int) -> bool:
        return self.board[i, j] == b''
    
    def is_draw(self) -> bool:
        has_empty_field = False
        for val in np.nditer(self.board):
            if val == b'':
                has_empty_field = True
                
        return not has_empty_field
    
    def has_won(self, agent:Agent):
        has_won = False
        
        # vertical + horizontal
        for board in [self.board, self.board.T]:
            for line in board:
                cnt = 0
                for symbol in line:
                    if symbol != agent.symbol:
                        break
                    cnt += 1
                    
                has_won = has_won or (cnt == self.n)
        
        # check diagonals
        diag_1_cnt = 0
        diag_2_cnt = 0
            
        for i in range(0, self.n):
            if self.board[i][i] == agent.symbol:
                diag_1_cnt += 1
                
            if self.board[self.n - 1 - i ][i] == agent.symbol:
                diag_2_cnt += 1
        
        has_won = has_won or diag_1_cnt == self.n or diag_2_cnt == self.n
        
        return has_won
    

    def get_board(self) -> np.ndarray:
        return self.board
    
    def reward(self, agent:Agent) -> int:
        if self.is_draw():
            return 0.
        
        if self.has_won(agent):
            return 1.
        
        return -1.

    def draw_board(self):
        print(self.board)

In [5]:
def get_state_hash_and_winner(env:Environment, agents:typing.List[Agent], i:int=0, j:int=0):
    """
    Generates all possible states
    """
    states = []
    symbols = [b'', *[agent.symbol for agent in agents]]
    for symbol in symbols:
        env.board[i, j] = symbol
        if i == env.n - 1 and j == env.n - 1:
            state = env.to_state(agents=agents)
            winner = symbols[0]
            
            for player in agents:
                if env.has_won(player):
                    winner = player.symbol
                    
            states.append((state, winner, env.is_draw() or winner != symbols[0]))
            
        elif j == env.n - 1:
            states += get_state_hash_and_winner(env=env, agents=agents, i = i + 1, j = 0)
        else:
            states += get_state_hash_and_winner(env=env, agents=agents, i = i, j = j + 1)
            
    return states

In [6]:
def init_Vs(env:Environment, agents: typing.List[Agent], game_state_tuples:typing.List[typing.Tuple]):
    vs = [np.zeros((len(agents) + 1) ** (env.n ** 2), dtype=np.float16) for agent in agents]

    for state_tup in game_state_tuples:
        for agent_num, agent in enumerate(agents):
            state, winner, ended = state_tup
            if ended:
                if winner == agent.symbol:
                    vs[agent_num][state] = 1.
            else:
                vs[agent_num][state] = 0.5
    return vs

In [7]:
def play_game(env:Environment, players: typing.List[Agent]) -> Environment:
    game_over = False
    env.empty_field()
    player_cnt = 0
    
    while not game_over:
        player = players[player_cnt]
        state = player.take_action(env, agents=players)
        
        for player in players:
            player.update_state_history(s=state)
            
        if env.is_draw():
            game_over = True
            
        for player in players:
            if env.has_won(player):
                game_over = True
                
        player_cnt = (player_cnt + 1) % len(players) 

    env.draw_board()
    
    for player in players:
        player.update(env=env)
        
    return env

In [8]:
#### Play code

In [9]:
env = Environment()
env.empty_field()
players = [Agent(symbol=b'x'), Agent(symbol=b'o')]

for player in players:
    player.set_verbose()
    
tuples = get_state_hash_and_winner(env=env, agents=players)
Vs = init_Vs(env=env, agents=players, game_state_tuples=tuples)

for i, agent in enumerate(players):
    agent.set_V_table(Vs[i])
    
T = 20000
for t in range(T):
    if t % 200 == 0:
        print(t)
    play_game(env=Environment(), players=players)

0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]


Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' g

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got r

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'' b'o']
 [b'' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'' b'x' b'']]
b'

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'']
 [b'o' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'']
 [b'o' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'']
 [b'' b'o' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a r

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b

[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'' b'o' b'x']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a 

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]

Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'' b'x']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'' b'']
 [b'x' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x

b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'' b'o']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got 

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random ac

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got 

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got re

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random a

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'' b'o']
 [b'' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a rando

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a 

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take

 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'' b'' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent 

b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random act

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'']
 [b'' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'']
 [b'o' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got r

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'' b'x' b'']
 [b'x' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']


b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'' b'x']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' go

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'x' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a ra

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
3000
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' g

Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']


b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' g

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'' b'o' b'x']
 [b'' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random acti

[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random actio

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'' b'x']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'o']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a rand

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o'

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x'

b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'o']]
b'x' go

b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will 

b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x'

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random acti

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]

b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' 

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
A

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
A

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random actio

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'

Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'x' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'' 

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'' b'']
 [b'x' b'o' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'']
 [b'' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
5200
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' g

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'' b'o']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'' b'' b'x']]
b'x' got

b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random actio

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
5600
Agent will take a random action
Agent will take a random action
[[b'x' b'o

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'o' b'']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got 

b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'' b'x']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random actio

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random a

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'' b'o']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random 

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' g

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'' b'x' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]


b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x

b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got re

[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'' b'']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o'

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b

 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got re

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b''

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a ra

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'']
 [b'x' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random actio

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got rewa

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'' b'o' b'']
 [b'' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'']
 [b'o' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a ra

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take 

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' 

Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a ran

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got rewa

b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o

Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random 

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o'

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'o']]
b'x' got r

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a ra

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'o']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got rew

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random act

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' go

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'' b'']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'x' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
8600
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'o' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o

Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b''

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
8800
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'o' b'']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a rando

Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a ran

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'' b'x' b'o']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' go

b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b''

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a 

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'o']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random actio

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random act

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got re

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' g

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'o']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a rand

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'' 

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a ran

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a rando

 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'']
 [b'x' b'' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got r

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a ran

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got r

b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will tak

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'']
 [b'o' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random act

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'' b'']
 [b'x' b'o' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a

Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got

Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got 

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'' b'x' b'']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a 

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' go

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random actio

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x'

Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got rewa

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'o']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'']
 [b'o' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got r

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'o']
 [b'x' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action


Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'o']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]

b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random actio

Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'o']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got r

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
12600
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will 

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'' b'']
 [b'x' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [

b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' 

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
12800
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a ran

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o'

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got

Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'o' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a 

Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'o' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take 

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a r

b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got rewa

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'' b'x' b'o']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a r

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
14000
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a ran

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a ra

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random actio

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent

Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a r

b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random acti

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a

b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' 

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got 

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'']
 [b'' b'o' b'']
 [b'x' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got rewar

b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'']
 [b'o' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']


b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o'

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got

Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a ran

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o'

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
15600
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x'

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take 

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'x' b'']
 [b'o' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]


Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'o' b'']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward

Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' go

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x'

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a ran

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got rewar

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
16600
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent wil

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a ra

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'o' b'' b'']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'' b'o' b'']
 [b'x' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got rew

Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'o' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'o' b'o']
 [b'' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'o' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' 

b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Ag

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0


b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random actio

b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agen

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'o']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a ra

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']


 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'o' b'x' b'']
 [b'' b'x' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got rew

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']

Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a ra

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'o']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]


Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a 

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'']
 [b'o' b'o' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got

Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']


b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'x' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x

b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0


Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'x' b'o']
 [b'x' b'o' b'o']]
b'x' got reward 0.0
b'o'

Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o

b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Ag

b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'

b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'' b'' b'o']
 [b'x' b'' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'o' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got r

b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
A

Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'']
 [b'x' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'']
 [b'x' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 

[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'x' b'o']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got 

Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'']]
b'x' got reward -1.0
b'o' 

Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a rand

b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'o']
 [b'' b'' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'o' b'']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got rewa

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'x']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'x' b'o']
 [b'' b'x' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'']
 [b'' b'o' b'x']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got

b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'o' b'o' b'x']
 [b'x' b'o' b'x']
 [b'x' b'o' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b

Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'x' b'x']
 [b'' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'o']
 [b'' b'x' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']

Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'o' b'']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'x' b'']
 [b'' b'x' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [

b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'o' b'x' b'x']
 [b'o' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'o']
 [b'o' b'o' b'x']
 [b'x' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'x']
 [b'' b'x' b'o']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'o' b'x']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random 

b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'' b'x']]
b'x' got reward -1.0
b'o' got reward 1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Ag

Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'x']
 [b'o' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'o' b'x']
 [b'o' b'x' b'']]
b'x' got reward -1.0
b'o' got reward 1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'o' b'x' b'o']
 [b'x' b'o' b'o']
 [b'x' b'x' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'x' b'o']]
b'x' got reward 0.0
b'o' got reward 0.0
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'o' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take 

Agent will take a random action
[[b'o' b'x' b'x']
 [b'x' b'o' b'x']
 [b'o' b'o' b'x']]
b'x' got reward 0.0
b'o' got reward 0.0
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'x' b'x']
 [b'' b'' b'o']
 [b'' b'o' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
[[b'x' b'o' b'o']
 [b'x' b'' b'']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'o' b'x' b'o']
 [b'x' b'' b'']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a random action
[[b'x' b'o' b'x']
 [b'x' b'' b'o']
 [b'x' b'' b'o']]
b'x' got reward 1.0
b'o' got reward -1.0
Agent will take a random action
Agent will take a random action
Agent will take a rando

In [11]:
own_player = Human(symbol=b'o')
play_game(players=[players[0], own_player], env=Environment())

[[b'x' b'' b'']
 [b'' b'' b'']
 [b'' b'' b'']]
Enter i,j in range 0 to 2: 1,1
[[b'x' b'x' b'']
 [b'' b'o' b'']
 [b'' b'' b'']]
Enter i,j in range 0 to 2: 0,2
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'' b'' b'']]
Enter i,j in range 0 to 2: 2,0
[[b'x' b'x' b'o']
 [b'x' b'o' b'']
 [b'o' b'' b'']]
b'x' got reward -1.0


<__main__.Environment at 0x1d1ae3265f8>

False