# 1. import libraries

In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy

# 2. define functions

In [13]:
###################################################################
def win_check(arr, char):
    '''
    reference: https://www.geeksforgeeks.org/validity-of-a-given-tic-tac-toe-board-configuration/
    Returns true if char wins. Char can be either
    1 or 2, the game starts with 1
    arr is the current board
    
    X => 1
    O => 2
    
    input: win_check(arr,1)
    output: True or False
    '''
    # Check all possible winning combinations
    matches = [[0, 1, 2],[3, 4, 5],[6, 7, 8],[0, 3, 6],[1, 4, 7],[2, 5, 8],[0, 4, 8],[2, 4, 6]]

    for i in range(8):
        if(arr[matches[i][0]] == char and
            arr[matches[i][1]] == char and
            arr[matches[i][2]] == char):
            return True
    return False
###################################################################
def is_valid(arr):
    '''
    reference: https://www.geeksforgeeks.org/validity-of-a-given-tic-tac-toe-board-configuration/
    Returns true if the board is valid
       
    input: is_valid(arr)
    output: True or False
    '''
    # Count number of 1 and 2 in the given board
    xcount = arr.count(1)
    ocount = arr.count(2)
    
    # Board can be valid only if either xcount and ocount
    # is same or xount is one more than oCount
    if(xcount == ocount+1 or xcount == ocount):
        # Check if O wins
        if win_check(arr, 2):
            # Check if X wins, At a given point only one can win,
            # if X also wins then return Invalid
            if win_check(arr, 1):
                return False #"Invalid"

            # O can only win if xcount == ocount in case where whole
            # board has values in each position.
            if xcount == ocount:
                return True #"Valid"

        # If X wins then it should be xc == oc + 1,
        # If not return Invalid	
        if win_check(arr, 1) and xcount != ocount+1:
            return False #"Invalid"
        
        # if O is not the winner return Valid
        if not win_check(arr, 2):
            return True #"Valid"
        
    # If nothing above matches return invalid
    return False #"Invalid"
###################################################################
def EGAS(QS,ep): # QS contains the value of all actions in state S, instead we should pass the value of all possible states
    '''
        epsilon greedy actions selection
        
        input: EGAS([0,1,-1],0.1)
        output: 0 or 1 or 2
    '''
    NumActions = len(QS)                       # determine the number of available Actions in state S
    Actions = list(range(0,NumActions))        # list the available actions is state S
    Qmax = max(QS)                             # find "one of" greedy actions in state S
    NumGreedy = 0                              # initialize number of greedy actions in state S
    GreedyActions = []                         # create list of greedy actions in state S
    for i in range(len(QS)):                   # find all greedy actions in state S
        if QS[i] == Qmax:
            GreedyActions.append(i)
            NumGreedy = NumGreedy + 1
    
    NonGreedyActions = []                         # create list of non-greedy actions
    for i in range(len(Actions)):
        if Actions[i] not in GreedyActions:
            NonGreedyActions.append(Actions[i])
    NumNonGreedy = len(NonGreedyActions)
    
    rnd = np.random.rand()
    if rnd >= ep: # choose one of greedy actions with equal probability
        rndGreedy = np.random.randint(NumGreedy)
        a = GreedyActions[rndGreedy]
    else: # choose one of non-greedy actions
        if NumActions == NumGreedy: # if all actions are greedy (this can happen in the beginning), choose one of them
            rndGreedy = np.random.randint(NumGreedy)
            a = Actions[rndGreedy]
        else: # if there are non-greedy actions, choose one of them with equal probability
            rndNonGreedy = np.random.randint(NumNonGreedy)
            a = NonGreedyActions[rndNonGreedy]
    return a
###################################################################
def get_possible_actions(board):
    '''
        input: get_possible_actions([0,1,0,2,0,0,0,0,0])
        output: [0,2,4,5,6,7,8]
    '''
    # in each state you can choose one of the empty places (represented by: i)
    possible_actions = []
    for i, j in enumerate(board):
        if j == 0:
            possible_actions.append(i)
    return possible_actions
###################################################################
def get_current_state(board):
    '''
        output is the number of board in all possible valid states (i.e., board 3 from 5478)
        current_board = [0,0,0,0,0,0,0,0,0]
        input: get_current_state(current_board)
        output: 0
    '''
    return boards.index(board)
###################################################################
def create_valid_boards():
    '''
        reference: https://stackoverflow.com/questions/61508393/generate-all-possible-board-positions-of-tic-tac-toe
        input: -
        output: a list of valid boards
    '''
    boards = []
    temp_boards = []

    for i in range(0 , 19683) : 
        c = i
        temp_boards = []
        for ii in range(0 , 9) : 
            temp_boards.append(c % 3)
            c = c // 3
        if is_valid(temp_boards):
            boards.append(temp_boards)
    return boards
###################################################################
def tied(board,possible_actions):
    '''
        Checks if the game is tied or not 
    '''
    
    if possible_actions == []: # all cells are full, no one won, end of episode
        return True
    else:
         return False
###################################################################
def Reward(board,winning_reward,punishment):
    end = False
    player_1_won = win_check(board, 1)
    player_2_won = win_check(board, 2)
    if player_1_won:
        r1, r2 = winning_reward, punishment
        end = True
    elif player_2_won:
        r2, r1 = winning_reward, punishment
        end = True
    else:
        r1, r2 = punishment, punishment
    return r1,r2,end

# 3. initialize the board

In [10]:
boards = create_valid_boards()

# 4. initialize the hyperparameters

In [14]:
Q1 = np.zeros((len(boards),9)) # Q table for player 1                                                (required)
Q2 = np.zeros((len(boards),9)) # Q table for player 2                                                (required)

ep = 1                         # initial epsilon or probability of choosing a non-greecy action      (required)
ep_decay_rate = 0.9991         #                                                                     (not required: it can be 1)

alpha = 1                      # initial step size                                                   (required)
alpha_decay_rate = 0.9991      #                                                                     (not required: it can be 1)

gamma = 1                      # discount factor or the inportance of future outcomes                (required)

punishment = -1                # reward of each decision, it can be 0                                (required)
winning_reward = 2             # winning reward                                                      (required)
max_episode = 6000             # number of turns to play                                             (required)

# 5. main loop

In [15]:
for episode in range(max_episode):
    # start episode
    Boards = deepcopy(boards) # therefore boards will remain unchabged
    board = Boards[0]
    S = get_current_state(board)
    last_action = 2 # since we want 1 to start the game
    end = False

    # steps
    counter = 1
    while True:
        if last_action == 1: # last action was 1, meaning that 2 wants to select an action, so we should use Q2
            QS = Q2[S]       # action values at state S for player 2
        else:                # last action was 2, meaning that 1 wants to select an action, so we should use Q1
            QS = Q1[S]
        
        possible_actions = get_possible_actions(board)
        if tied(board,possible_actions):
            print(f'Episode: {episode} | tied')
            break
        
        QpS = np.array(list(map(lambda i: QS[i],possible_actions))) # QpS = QS[possible_actions], possible action values at 
                                                                    #state S

        a = possible_actions[EGAS(QpS,ep)] # change the possible action index to actual action index (0-8)
        if last_action == 1:
            board[a], last_action = 2, 2
            #print(f'Turn: {counter} | player: {2} | board: {board}')
        else:
            board[a], last_action = 1, 1
            #print(f'Turn: {counter} | player: {1} | board: {board}')
        counter += 1

        Sp = get_current_state(board)
        
        r1,r2,end = Reward(board,winning_reward,punishment)
        
        if last_action == 2:
            Q2[S][a] = Q2[S][a] + alpha*(r2 + gamma * max(Q2[Sp]) - Q2[S][a])
        else:
            Q1[S][a] = Q1[S][a] + alpha*(r1 + gamma * max(Q1[Sp]) - Q1[S][a])

        if end:
            if r1 == winning_reward:
                print(f'Episode: {episode} | player 1 won!')
            if r2 == winning_reward:
                print(f'Episode: {episode} | player 2 won!')
            break
        else:
            S = Sp
    ep = max(ep*ep_decay_rate, 0.01)
    alpha = max(alpha*alpha_decay_rate, 0.01)

Episode: 0 | tied
Episode: 1 | tied
Episode: 2 | tied
Episode: 3 | tied
Episode: 4 | tied
Episode: 5 | tied
Episode: 6 | tied
Episode: 7 | tied
Episode: 8 | tied
Episode: 9 | tied
Episode: 10 | tied
Episode: 11 | tied
Episode: 12 | tied
Episode: 13 | tied
Episode: 14 | tied
Episode: 15 | tied
Episode: 16 | tied
Episode: 17 | tied
Episode: 18 | player 1 won!
Episode: 19 | tied
Episode: 20 | player 2 won!
Episode: 21 | tied
Episode: 22 | player 2 won!
Episode: 23 | tied
Episode: 24 | tied
Episode: 25 | player 1 won!
Episode: 26 | tied
Episode: 27 | tied
Episode: 28 | player 1 won!
Episode: 29 | player 1 won!
Episode: 30 | player 2 won!
Episode: 31 | player 1 won!
Episode: 32 | player 1 won!
Episode: 33 | player 1 won!
Episode: 34 | tied
Episode: 35 | player 1 won!
Episode: 36 | tied
Episode: 37 | player 2 won!
Episode: 38 | tied
Episode: 39 | player 1 won!
Episode: 40 | player 1 won!
Episode: 41 | player 1 won!
Episode: 42 | player 1 won!
Episode: 43 | tied
Episode: 44 | player 1 won!
Ep

Episode: 312 | tied
Episode: 313 | player 1 won!
Episode: 314 | tied
Episode: 315 | player 1 won!
Episode: 316 | player 2 won!
Episode: 317 | player 1 won!
Episode: 318 | player 2 won!
Episode: 319 | player 1 won!
Episode: 320 | player 1 won!
Episode: 321 | player 1 won!
Episode: 322 | player 1 won!
Episode: 323 | player 1 won!
Episode: 324 | player 2 won!
Episode: 325 | player 1 won!
Episode: 326 | player 1 won!
Episode: 327 | tied
Episode: 328 | player 2 won!
Episode: 329 | tied
Episode: 330 | player 1 won!
Episode: 331 | player 1 won!
Episode: 332 | player 1 won!
Episode: 333 | player 1 won!
Episode: 334 | player 1 won!
Episode: 335 | player 2 won!
Episode: 336 | player 1 won!
Episode: 337 | player 2 won!
Episode: 338 | tied
Episode: 339 | player 2 won!
Episode: 340 | player 1 won!
Episode: 341 | player 2 won!
Episode: 342 | tied
Episode: 343 | player 1 won!
Episode: 344 | player 1 won!
Episode: 345 | player 1 won!
Episode: 346 | tied
Episode: 347 | player 1 won!
Episode: 348 | play

Episode: 609 | player 1 won!
Episode: 610 | player 1 won!
Episode: 611 | player 2 won!
Episode: 612 | player 2 won!
Episode: 613 | tied
Episode: 614 | player 1 won!
Episode: 615 | tied
Episode: 616 | tied
Episode: 617 | player 1 won!
Episode: 618 | player 1 won!
Episode: 619 | player 2 won!
Episode: 620 | player 1 won!
Episode: 621 | player 2 won!
Episode: 622 | player 2 won!
Episode: 623 | player 1 won!
Episode: 624 | player 1 won!
Episode: 625 | player 2 won!
Episode: 626 | player 2 won!
Episode: 627 | player 2 won!
Episode: 628 | player 1 won!
Episode: 629 | player 1 won!
Episode: 630 | player 2 won!
Episode: 631 | player 1 won!
Episode: 632 | player 1 won!
Episode: 633 | player 1 won!
Episode: 634 | player 1 won!
Episode: 635 | player 1 won!
Episode: 636 | player 2 won!
Episode: 637 | player 1 won!
Episode: 638 | player 2 won!
Episode: 639 | player 1 won!
Episode: 640 | player 1 won!
Episode: 641 | player 1 won!
Episode: 642 | tied
Episode: 643 | player 1 won!
Episode: 644 | player

Episode: 906 | player 2 won!
Episode: 907 | player 1 won!
Episode: 908 | player 2 won!
Episode: 909 | player 1 won!
Episode: 910 | player 1 won!
Episode: 911 | player 2 won!
Episode: 912 | player 1 won!
Episode: 913 | player 2 won!
Episode: 914 | player 1 won!
Episode: 915 | player 2 won!
Episode: 916 | player 2 won!
Episode: 917 | player 1 won!
Episode: 918 | player 1 won!
Episode: 919 | player 1 won!
Episode: 920 | player 1 won!
Episode: 921 | player 1 won!
Episode: 922 | player 1 won!
Episode: 923 | player 1 won!
Episode: 924 | player 2 won!
Episode: 925 | player 1 won!
Episode: 926 | player 1 won!
Episode: 927 | player 1 won!
Episode: 928 | player 1 won!
Episode: 929 | player 2 won!
Episode: 930 | player 2 won!
Episode: 931 | player 1 won!
Episode: 932 | tied
Episode: 933 | player 2 won!
Episode: 934 | player 1 won!
Episode: 935 | player 1 won!
Episode: 936 | player 1 won!
Episode: 937 | player 2 won!
Episode: 938 | player 1 won!
Episode: 939 | player 1 won!
Episode: 940 | player 1

Episode: 1192 | player 1 won!
Episode: 1193 | player 1 won!
Episode: 1194 | tied
Episode: 1195 | player 2 won!
Episode: 1196 | player 2 won!
Episode: 1197 | player 1 won!
Episode: 1198 | player 1 won!
Episode: 1199 | player 1 won!
Episode: 1200 | player 2 won!
Episode: 1201 | player 1 won!
Episode: 1202 | player 2 won!
Episode: 1203 | player 1 won!
Episode: 1204 | player 1 won!
Episode: 1205 | player 2 won!
Episode: 1206 | player 2 won!
Episode: 1207 | player 1 won!
Episode: 1208 | tied
Episode: 1209 | player 2 won!
Episode: 1210 | player 2 won!
Episode: 1211 | player 2 won!
Episode: 1212 | player 1 won!
Episode: 1213 | player 1 won!
Episode: 1214 | player 1 won!
Episode: 1215 | tied
Episode: 1216 | tied
Episode: 1217 | player 2 won!
Episode: 1218 | player 1 won!
Episode: 1219 | player 2 won!
Episode: 1220 | tied
Episode: 1221 | player 1 won!
Episode: 1222 | player 1 won!
Episode: 1223 | player 1 won!
Episode: 1224 | player 1 won!
Episode: 1225 | player 2 won!
Episode: 1226 | tied
Epis

Episode: 1474 | player 1 won!
Episode: 1475 | player 1 won!
Episode: 1476 | player 2 won!
Episode: 1477 | player 1 won!
Episode: 1478 | player 2 won!
Episode: 1479 | player 2 won!
Episode: 1480 | player 1 won!
Episode: 1481 | player 2 won!
Episode: 1482 | tied
Episode: 1483 | player 1 won!
Episode: 1484 | player 2 won!
Episode: 1485 | player 2 won!
Episode: 1486 | tied
Episode: 1487 | player 1 won!
Episode: 1488 | player 2 won!
Episode: 1489 | player 1 won!
Episode: 1490 | tied
Episode: 1491 | player 1 won!
Episode: 1492 | player 1 won!
Episode: 1493 | player 1 won!
Episode: 1494 | player 1 won!
Episode: 1495 | player 1 won!
Episode: 1496 | player 2 won!
Episode: 1497 | player 2 won!
Episode: 1498 | player 1 won!
Episode: 1499 | player 1 won!
Episode: 1500 | player 1 won!
Episode: 1501 | tied
Episode: 1502 | player 2 won!
Episode: 1503 | player 1 won!
Episode: 1504 | player 1 won!
Episode: 1505 | player 1 won!
Episode: 1506 | player 1 won!
Episode: 1507 | player 1 won!
Episode: 1508 | 

Episode: 1757 | player 2 won!
Episode: 1758 | player 1 won!
Episode: 1759 | player 2 won!
Episode: 1760 | player 2 won!
Episode: 1761 | player 1 won!
Episode: 1762 | player 2 won!
Episode: 1763 | player 1 won!
Episode: 1764 | player 1 won!
Episode: 1765 | player 2 won!
Episode: 1766 | player 1 won!
Episode: 1767 | player 2 won!
Episode: 1768 | player 1 won!
Episode: 1769 | player 1 won!
Episode: 1770 | player 1 won!
Episode: 1771 | player 1 won!
Episode: 1772 | player 1 won!
Episode: 1773 | player 1 won!
Episode: 1774 | player 1 won!
Episode: 1775 | player 1 won!
Episode: 1776 | player 2 won!
Episode: 1777 | player 1 won!
Episode: 1778 | player 2 won!
Episode: 1779 | tied
Episode: 1780 | player 2 won!
Episode: 1781 | player 1 won!
Episode: 1782 | tied
Episode: 1783 | player 2 won!
Episode: 1784 | player 2 won!
Episode: 1785 | player 1 won!
Episode: 1786 | player 2 won!
Episode: 1787 | player 2 won!
Episode: 1788 | player 1 won!
Episode: 1789 | player 1 won!
Episode: 1790 | player 1 won

Episode: 2039 | player 1 won!
Episode: 2040 | player 2 won!
Episode: 2041 | player 1 won!
Episode: 2042 | player 1 won!
Episode: 2043 | player 1 won!
Episode: 2044 | player 2 won!
Episode: 2045 | player 1 won!
Episode: 2046 | player 1 won!
Episode: 2047 | player 1 won!
Episode: 2048 | player 1 won!
Episode: 2049 | player 1 won!
Episode: 2050 | player 1 won!
Episode: 2051 | tied
Episode: 2052 | tied
Episode: 2053 | player 1 won!
Episode: 2054 | player 1 won!
Episode: 2055 | player 1 won!
Episode: 2056 | player 1 won!
Episode: 2057 | player 2 won!
Episode: 2058 | tied
Episode: 2059 | player 1 won!
Episode: 2060 | player 1 won!
Episode: 2061 | player 2 won!
Episode: 2062 | tied
Episode: 2063 | player 1 won!
Episode: 2064 | tied
Episode: 2065 | player 2 won!
Episode: 2066 | player 2 won!
Episode: 2067 | player 1 won!
Episode: 2068 | player 1 won!
Episode: 2069 | player 1 won!
Episode: 2070 | player 1 won!
Episode: 2071 | player 1 won!
Episode: 2072 | player 1 won!
Episode: 2073 | player 1 

Episode: 2325 | player 1 won!
Episode: 2326 | player 2 won!
Episode: 2327 | player 1 won!
Episode: 2328 | player 1 won!
Episode: 2329 | player 2 won!
Episode: 2330 | player 1 won!
Episode: 2331 | player 1 won!
Episode: 2332 | player 1 won!
Episode: 2333 | tied
Episode: 2334 | player 2 won!
Episode: 2335 | player 1 won!
Episode: 2336 | player 2 won!
Episode: 2337 | player 2 won!
Episode: 2338 | player 2 won!
Episode: 2339 | player 1 won!
Episode: 2340 | tied
Episode: 2341 | player 1 won!
Episode: 2342 | player 2 won!
Episode: 2343 | player 2 won!
Episode: 2344 | player 1 won!
Episode: 2345 | player 1 won!
Episode: 2346 | player 1 won!
Episode: 2347 | player 1 won!
Episode: 2348 | player 1 won!
Episode: 2349 | player 2 won!
Episode: 2350 | tied
Episode: 2351 | player 1 won!
Episode: 2352 | player 2 won!
Episode: 2353 | player 2 won!
Episode: 2354 | player 1 won!
Episode: 2355 | player 2 won!
Episode: 2356 | tied
Episode: 2357 | player 1 won!
Episode: 2358 | player 1 won!
Episode: 2359 | 

Episode: 2608 | player 2 won!
Episode: 2609 | player 1 won!
Episode: 2610 | player 1 won!
Episode: 2611 | player 1 won!
Episode: 2612 | player 1 won!
Episode: 2613 | player 1 won!
Episode: 2614 | player 1 won!
Episode: 2615 | player 1 won!
Episode: 2616 | player 1 won!
Episode: 2617 | player 1 won!
Episode: 2618 | player 1 won!
Episode: 2619 | player 1 won!
Episode: 2620 | tied
Episode: 2621 | player 2 won!
Episode: 2622 | player 1 won!
Episode: 2623 | player 1 won!
Episode: 2624 | player 2 won!
Episode: 2625 | player 1 won!
Episode: 2626 | player 2 won!
Episode: 2627 | player 2 won!
Episode: 2628 | player 1 won!
Episode: 2629 | player 1 won!
Episode: 2630 | player 1 won!
Episode: 2631 | player 1 won!
Episode: 2632 | player 1 won!
Episode: 2633 | player 1 won!
Episode: 2634 | player 1 won!
Episode: 2635 | player 2 won!
Episode: 2636 | player 2 won!
Episode: 2637 | player 1 won!
Episode: 2638 | player 2 won!
Episode: 2639 | player 1 won!
Episode: 2640 | player 1 won!
Episode: 2641 | pla

Episode: 2893 | player 1 won!
Episode: 2894 | player 2 won!
Episode: 2895 | player 1 won!
Episode: 2896 | player 1 won!
Episode: 2897 | player 1 won!
Episode: 2898 | player 1 won!
Episode: 2899 | player 2 won!
Episode: 2900 | player 1 won!
Episode: 2901 | player 1 won!
Episode: 2902 | player 2 won!
Episode: 2903 | player 1 won!
Episode: 2904 | tied
Episode: 2905 | player 1 won!
Episode: 2906 | player 1 won!
Episode: 2907 | player 1 won!
Episode: 2908 | player 2 won!
Episode: 2909 | tied
Episode: 2910 | player 1 won!
Episode: 2911 | player 2 won!
Episode: 2912 | player 1 won!
Episode: 2913 | player 1 won!
Episode: 2914 | player 1 won!
Episode: 2915 | player 1 won!
Episode: 2916 | player 2 won!
Episode: 2917 | tied
Episode: 2918 | player 1 won!
Episode: 2919 | tied
Episode: 2920 | tied
Episode: 2921 | player 2 won!
Episode: 2922 | player 1 won!
Episode: 2923 | player 1 won!
Episode: 2924 | player 1 won!
Episode: 2925 | player 1 won!
Episode: 2926 | player 1 won!
Episode: 2927 | player 1 

Episode: 3175 | player 1 won!
Episode: 3176 | player 1 won!
Episode: 3177 | player 1 won!
Episode: 3178 | player 1 won!
Episode: 3179 | player 1 won!
Episode: 3180 | player 1 won!
Episode: 3181 | player 1 won!
Episode: 3182 | player 1 won!
Episode: 3183 | player 1 won!
Episode: 3184 | player 2 won!
Episode: 3185 | player 1 won!
Episode: 3186 | player 2 won!
Episode: 3187 | tied
Episode: 3188 | player 1 won!
Episode: 3189 | player 2 won!
Episode: 3190 | player 2 won!
Episode: 3191 | tied
Episode: 3192 | player 1 won!
Episode: 3193 | player 2 won!
Episode: 3194 | player 2 won!
Episode: 3195 | player 2 won!
Episode: 3196 | player 1 won!
Episode: 3197 | player 1 won!
Episode: 3198 | player 1 won!
Episode: 3199 | player 1 won!
Episode: 3200 | player 1 won!
Episode: 3201 | tied
Episode: 3202 | player 1 won!
Episode: 3203 | player 1 won!
Episode: 3204 | player 2 won!
Episode: 3205 | player 1 won!
Episode: 3206 | player 1 won!
Episode: 3207 | player 2 won!
Episode: 3208 | player 1 won!
Episode

Episode: 3457 | player 1 won!
Episode: 3458 | player 1 won!
Episode: 3459 | player 1 won!
Episode: 3460 | player 2 won!
Episode: 3461 | player 1 won!
Episode: 3462 | player 1 won!
Episode: 3463 | player 1 won!
Episode: 3464 | player 2 won!
Episode: 3465 | player 1 won!
Episode: 3466 | player 1 won!
Episode: 3467 | player 2 won!
Episode: 3468 | player 1 won!
Episode: 3469 | player 1 won!
Episode: 3470 | player 1 won!
Episode: 3471 | player 2 won!
Episode: 3472 | player 2 won!
Episode: 3473 | player 2 won!
Episode: 3474 | player 1 won!
Episode: 3475 | player 1 won!
Episode: 3476 | player 1 won!
Episode: 3477 | player 1 won!
Episode: 3478 | player 1 won!
Episode: 3479 | player 1 won!
Episode: 3480 | player 2 won!
Episode: 3481 | player 2 won!
Episode: 3482 | player 1 won!
Episode: 3483 | player 1 won!
Episode: 3484 | player 2 won!
Episode: 3485 | player 2 won!
Episode: 3486 | player 1 won!
Episode: 3487 | player 1 won!
Episode: 3488 | player 2 won!
Episode: 3489 | player 1 won!
Episode: 3

Episode: 3737 | player 1 won!
Episode: 3738 | player 1 won!
Episode: 3739 | player 1 won!
Episode: 3740 | player 2 won!
Episode: 3741 | player 1 won!
Episode: 3742 | player 1 won!
Episode: 3743 | player 1 won!
Episode: 3744 | player 1 won!
Episode: 3745 | player 1 won!
Episode: 3746 | player 2 won!
Episode: 3747 | player 1 won!
Episode: 3748 | player 2 won!
Episode: 3749 | player 1 won!
Episode: 3750 | player 2 won!
Episode: 3751 | player 1 won!
Episode: 3752 | player 1 won!
Episode: 3753 | player 1 won!
Episode: 3754 | player 2 won!
Episode: 3755 | player 2 won!
Episode: 3756 | player 1 won!
Episode: 3757 | player 1 won!
Episode: 3758 | player 1 won!
Episode: 3759 | player 1 won!
Episode: 3760 | player 2 won!
Episode: 3761 | tied
Episode: 3762 | player 1 won!
Episode: 3763 | player 1 won!
Episode: 3764 | player 2 won!
Episode: 3765 | player 1 won!
Episode: 3766 | player 1 won!
Episode: 3767 | player 1 won!
Episode: 3768 | player 1 won!
Episode: 3769 | player 1 won!
Episode: 3770 | pla

Episode: 4017 | player 1 won!
Episode: 4018 | player 2 won!
Episode: 4019 | player 1 won!
Episode: 4020 | player 1 won!
Episode: 4021 | player 1 won!
Episode: 4022 | player 1 won!
Episode: 4023 | player 1 won!
Episode: 4024 | player 1 won!
Episode: 4025 | player 1 won!
Episode: 4026 | player 2 won!
Episode: 4027 | player 1 won!
Episode: 4028 | player 1 won!
Episode: 4029 | player 1 won!
Episode: 4030 | player 1 won!
Episode: 4031 | player 1 won!
Episode: 4032 | player 1 won!
Episode: 4033 | player 1 won!
Episode: 4034 | player 2 won!
Episode: 4035 | player 1 won!
Episode: 4036 | player 1 won!
Episode: 4037 | player 2 won!
Episode: 4038 | player 1 won!
Episode: 4039 | player 1 won!
Episode: 4040 | tied
Episode: 4041 | player 2 won!
Episode: 4042 | player 2 won!
Episode: 4043 | player 2 won!
Episode: 4044 | player 1 won!
Episode: 4045 | player 1 won!
Episode: 4046 | player 1 won!
Episode: 4047 | player 1 won!
Episode: 4048 | player 1 won!
Episode: 4049 | player 2 won!
Episode: 4050 | pla

Episode: 4301 | player 2 won!
Episode: 4302 | player 2 won!
Episode: 4303 | player 2 won!
Episode: 4304 | tied
Episode: 4305 | player 2 won!
Episode: 4306 | player 1 won!
Episode: 4307 | player 1 won!
Episode: 4308 | player 1 won!
Episode: 4309 | player 1 won!
Episode: 4310 | player 2 won!
Episode: 4311 | tied
Episode: 4312 | player 1 won!
Episode: 4313 | player 2 won!
Episode: 4314 | player 1 won!
Episode: 4315 | player 1 won!
Episode: 4316 | player 1 won!
Episode: 4317 | player 1 won!
Episode: 4318 | player 2 won!
Episode: 4319 | player 1 won!
Episode: 4320 | player 2 won!
Episode: 4321 | player 2 won!
Episode: 4322 | player 1 won!
Episode: 4323 | player 1 won!
Episode: 4324 | player 1 won!
Episode: 4325 | player 1 won!
Episode: 4326 | player 1 won!
Episode: 4327 | player 1 won!
Episode: 4328 | player 1 won!
Episode: 4329 | player 1 won!
Episode: 4330 | player 2 won!
Episode: 4331 | tied
Episode: 4332 | player 1 won!
Episode: 4333 | player 2 won!
Episode: 4334 | player 1 won!
Episode

Episode: 4582 | player 1 won!
Episode: 4583 | player 1 won!
Episode: 4584 | player 1 won!
Episode: 4585 | player 2 won!
Episode: 4586 | player 2 won!
Episode: 4587 | player 1 won!
Episode: 4588 | player 2 won!
Episode: 4589 | player 2 won!
Episode: 4590 | player 1 won!
Episode: 4591 | player 1 won!
Episode: 4592 | tied
Episode: 4593 | player 1 won!
Episode: 4594 | player 2 won!
Episode: 4595 | player 1 won!
Episode: 4596 | player 1 won!
Episode: 4597 | player 1 won!
Episode: 4598 | tied
Episode: 4599 | player 1 won!
Episode: 4600 | player 1 won!
Episode: 4601 | player 2 won!
Episode: 4602 | player 1 won!
Episode: 4603 | player 1 won!
Episode: 4604 | player 1 won!
Episode: 4605 | player 1 won!
Episode: 4606 | player 2 won!
Episode: 4607 | player 1 won!
Episode: 4608 | player 1 won!
Episode: 4609 | player 1 won!
Episode: 4610 | player 1 won!
Episode: 4611 | player 1 won!
Episode: 4612 | player 2 won!
Episode: 4613 | player 2 won!
Episode: 4614 | player 2 won!
Episode: 4615 | player 1 won

Episode: 4867 | player 1 won!
Episode: 4868 | player 2 won!
Episode: 4869 | player 1 won!
Episode: 4870 | player 2 won!
Episode: 4871 | tied
Episode: 4872 | player 2 won!
Episode: 4873 | player 1 won!
Episode: 4874 | player 1 won!
Episode: 4875 | player 1 won!
Episode: 4876 | player 1 won!
Episode: 4877 | player 1 won!
Episode: 4878 | player 1 won!
Episode: 4879 | player 1 won!
Episode: 4880 | player 1 won!
Episode: 4881 | player 1 won!
Episode: 4882 | player 1 won!
Episode: 4883 | player 2 won!
Episode: 4884 | player 1 won!
Episode: 4885 | player 2 won!
Episode: 4886 | tied
Episode: 4887 | player 2 won!
Episode: 4888 | player 1 won!
Episode: 4889 | player 1 won!
Episode: 4890 | player 2 won!
Episode: 4891 | player 1 won!
Episode: 4892 | player 1 won!
Episode: 4893 | player 2 won!
Episode: 4894 | player 1 won!
Episode: 4895 | player 1 won!
Episode: 4896 | player 1 won!
Episode: 4897 | player 1 won!
Episode: 4898 | player 1 won!
Episode: 4899 | player 1 won!
Episode: 4900 | player 2 won

Episode: 5146 | player 1 won!
Episode: 5147 | player 1 won!
Episode: 5148 | player 2 won!
Episode: 5149 | player 1 won!
Episode: 5150 | player 2 won!
Episode: 5151 | player 1 won!
Episode: 5152 | player 1 won!
Episode: 5153 | player 1 won!
Episode: 5154 | player 1 won!
Episode: 5155 | tied
Episode: 5156 | player 1 won!
Episode: 5157 | player 2 won!
Episode: 5158 | player 2 won!
Episode: 5159 | player 2 won!
Episode: 5160 | player 1 won!
Episode: 5161 | player 1 won!
Episode: 5162 | player 1 won!
Episode: 5163 | player 1 won!
Episode: 5164 | player 1 won!
Episode: 5165 | tied
Episode: 5166 | player 2 won!
Episode: 5167 | player 1 won!
Episode: 5168 | player 1 won!
Episode: 5169 | player 2 won!
Episode: 5170 | player 1 won!
Episode: 5171 | player 1 won!
Episode: 5172 | player 2 won!
Episode: 5173 | player 2 won!
Episode: 5174 | player 1 won!
Episode: 5175 | player 1 won!
Episode: 5176 | player 2 won!
Episode: 5177 | player 1 won!
Episode: 5178 | player 2 won!
Episode: 5179 | player 1 won

Episode: 5425 | player 1 won!
Episode: 5426 | player 2 won!
Episode: 5427 | player 1 won!
Episode: 5428 | player 2 won!
Episode: 5429 | player 1 won!
Episode: 5430 | player 1 won!
Episode: 5431 | player 1 won!
Episode: 5432 | player 2 won!
Episode: 5433 | player 2 won!
Episode: 5434 | player 1 won!
Episode: 5435 | player 1 won!
Episode: 5436 | player 2 won!
Episode: 5437 | player 1 won!
Episode: 5438 | player 1 won!
Episode: 5439 | player 2 won!
Episode: 5440 | player 1 won!
Episode: 5441 | player 2 won!
Episode: 5442 | player 1 won!
Episode: 5443 | player 1 won!
Episode: 5444 | player 1 won!
Episode: 5445 | tied
Episode: 5446 | player 2 won!
Episode: 5447 | player 1 won!
Episode: 5448 | player 1 won!
Episode: 5449 | player 2 won!
Episode: 5450 | player 1 won!
Episode: 5451 | player 1 won!
Episode: 5452 | player 2 won!
Episode: 5453 | player 2 won!
Episode: 5454 | player 1 won!
Episode: 5455 | player 1 won!
Episode: 5456 | player 1 won!
Episode: 5457 | player 2 won!
Episode: 5458 | pla

Episode: 5702 | player 2 won!
Episode: 5703 | player 2 won!
Episode: 5704 | player 1 won!
Episode: 5705 | player 1 won!
Episode: 5706 | player 1 won!
Episode: 5707 | player 1 won!
Episode: 5708 | player 1 won!
Episode: 5709 | tied
Episode: 5710 | tied
Episode: 5711 | player 2 won!
Episode: 5712 | player 1 won!
Episode: 5713 | player 1 won!
Episode: 5714 | player 1 won!
Episode: 5715 | tied
Episode: 5716 | player 1 won!
Episode: 5717 | player 1 won!
Episode: 5718 | player 1 won!
Episode: 5719 | player 1 won!
Episode: 5720 | player 1 won!
Episode: 5721 | player 1 won!
Episode: 5722 | player 2 won!
Episode: 5723 | player 2 won!
Episode: 5724 | player 1 won!
Episode: 5725 | player 1 won!
Episode: 5726 | player 1 won!
Episode: 5727 | player 1 won!
Episode: 5728 | player 1 won!
Episode: 5729 | player 1 won!
Episode: 5730 | tied
Episode: 5731 | tied
Episode: 5732 | player 1 won!
Episode: 5733 | player 1 won!
Episode: 5734 | player 1 won!
Episode: 5735 | player 1 won!
Episode: 5736 | player 1 

Episode: 5987 | player 1 won!
Episode: 5988 | player 1 won!
Episode: 5989 | player 1 won!
Episode: 5990 | tied
Episode: 5991 | player 1 won!
Episode: 5992 | player 1 won!
Episode: 5993 | player 1 won!
Episode: 5994 | player 1 won!
Episode: 5995 | player 2 won!
Episode: 5996 | player 1 won!
Episode: 5997 | player 1 won!
Episode: 5998 | player 1 won!
Episode: 5999 | player 1 won!


# 6. check the Q-table

In [17]:
pd.DataFrame(Q1)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-1.0,-1.0,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000
1,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.0,0.0,-0.910659,-0.968130,-0.911273,-0.910528,-0.911382,-0.911400,-0.911322
4,0.0,0.0,-1.000000,-0.426653,-1.000000,-0.361132,-0.407334,-0.385297,-0.449191
...,...,...,...,...,...,...,...,...,...
5473,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5474,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5475,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5476,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
