In [None]:
import collections
import numpy as np
import tensorflow as tf

from tic_tac_toe import play_game, random_player

HIDDEN_NODES = (100, 100, 100)  
INPUT_NODES = 3 * 3  
BATCH_SIZE = 100  # 얼마나 많은 게임을 돌릴 지 결정한다. 
LEARN_RATE = 1e-4
OUTPUT_NODES = INPUT_NODES
PRINT_RESULTS_EVERY_X = 1000  # 몇 번의 게임이 끝날 때마다 결과를 출력할지 결정한다. 

input_placeholder = tf.placeholder("float", shape=(None, INPUT_NODES))
reward_placeholder = tf.placeholder("float", shape=(None,))
actual_move_placeholder = tf.placeholder("float", shape=(None, OUTPUT_NODES))

hidden_weights_1 = tf.Variable(tf.truncated_normal((INPUT_NODES, HIDDEN_NODES[0]), stddev=1. / np.sqrt(INPUT_NODES)))
hidden_weights_2 = tf.Variable(
    tf.truncated_normal((HIDDEN_NODES[0], HIDDEN_NODES[1]), stddev=1. / np.sqrt(HIDDEN_NODES[0])))
hidden_weights_3 = tf.Variable(
    tf.truncated_normal((HIDDEN_NODES[1], HIDDEN_NODES[2]), stddev=1. / np.sqrt(HIDDEN_NODES[1])))
output_weights = tf.Variable(tf.truncated_normal((HIDDEN_NODES[-1], OUTPUT_NODES), stddev=1. / np.sqrt(OUTPUT_NODES)))

hidden_layer_1 = tf.nn.relu(
    tf.matmul(input_placeholder, hidden_weights_1) + tf.Variable(tf.constant(0.01, shape=(HIDDEN_NODES[0],))))
hidden_layer_2 = tf.nn.relu(
    tf.matmul(hidden_layer_1, hidden_weights_2) + tf.Variable(tf.constant(0.01, shape=(HIDDEN_NODES[1],))))
hidden_layer_3 = tf.nn.relu(
    tf.matmul(hidden_layer_2, hidden_weights_3) + tf.Variable(tf.constant(0.01, shape=(HIDDEN_NODES[2],))))
output_layer = tf.nn.softmax(
    tf.matmul(hidden_layer_3, output_weights) + tf.Variable(tf.constant(0.01, shape=(OUTPUT_NODES,))))

policy_gradient = tf.reduce_sum(tf.reshape(reward_placeholder, (-1, 1)) * actual_move_placeholder * output_layer)
train_step = tf.train.RMSPropOptimizer(LEARN_RATE).minimize(-policy_gradient)

sess = tf.Session()
sess.run(tf.initialize_all_variables())

board_states, actual_moves, rewards = [], [], []
episode_number = 1
results = collections.deque()


def make_move(board_state, side):
    board_state_flat = np.ravel(board_state)
    board_states.append(board_state_flat)
    probability_of_actions = sess.run(output_layer, feed_dict={input_placeholder: [board_state_flat]})[0]

    try:
        move = np.random.multinomial(1, probability_of_actions)
    except ValueError:
        # probability_of_actions 의 합이 1보다 커지면 반올림 에러가 발생할 수도 있다. 
        # 그래서 유효한 값으로 값을 조금 줄인다. 
        move = np.random.multinomial(1, probability_of_actions / (sum(probability_of_actions) + 1e-7))

    actual_moves.append(move)

    move_index = move.argmax()
    return move_index / 3, move_index % 3


while True:
    reward = play_game(make_move, random_player)

    results.append(reward)
    if len(results) > PRINT_RESULTS_EVERY_X:
        results.popleft()

    last_game_length = len(board_states) - len(rewards)

    
    # 아래의 코드에서 이겨도 더 빨리 이길 수 있도록, 져도 더 늦게 지도록 조정한다. 
    reward /= float(last_game_length)

    rewards += ([reward] * last_game_length)

    episode_number += 1

    if episode_number % BATCH_SIZE == 0:
        normalized_rewards = rewards - np.mean(rewards)
        normalized_rewards /= np.std(normalized_rewards)

        sess.run(train_step, feed_dict={input_placeholder: board_states,
                                        reward_placeholder: normalized_rewards,
                                        actual_move_placeholder: actual_moves})

        # 배치 삭제 
        del board_states[:]
        del actual_moves[:]
        del rewards[:]

    if episode_number % PRINT_RESULTS_EVERY_X == 0:
        print("플레이어: %s 가 이긴 확률: %s" % (episode_number, 0.5 + sum(results) / (PRINT_RESULTS_EVERY_X * 2.)))

Instructions for updating:
Use `tf.global_variables_initializer` instead.
episode: 1000 win_rate: 0.0025000000000000022
episode: 2000 win_rate: 0.0010000000000000009
episode: 3000 win_rate: 0.006000000000000005
episode: 4000 win_rate: 0.0020000000000000018
episode: 5000 win_rate: 0.0040000000000000036
episode: 6000 win_rate: 0.0050000000000000044
episode: 7000 win_rate: 0.008000000000000007
episode: 8000 win_rate: 0.007000000000000006
episode: 9000 win_rate: 0.008000000000000007
episode: 10000 win_rate: 0.01100000000000001
episode: 11000 win_rate: 0.015000000000000013
episode: 12000 win_rate: 0.017000000000000015
episode: 13000 win_rate: 0.013000000000000012
episode: 14000 win_rate: 0.017000000000000015
episode: 15000 win_rate: 0.025000000000000022
episode: 16000 win_rate: 0.020000000000000018
episode: 17000 win_rate: 0.017000000000000015
episode: 18000 win_rate: 0.017000000000000015
episode: 19000 win_rate: 0.01200000000000001
episode: 20000 win_rate: 0.01100000000000001
episode: 2100

episode: 224000 win_rate: 0.6
episode: 225000 win_rate: 0.614
episode: 226000 win_rate: 0.619
episode: 227000 win_rate: 0.5589999999999999
episode: 228000 win_rate: 0.597
episode: 229000 win_rate: 0.585
episode: 230000 win_rate: 0.595
episode: 231000 win_rate: 0.601
episode: 232000 win_rate: 0.597
episode: 233000 win_rate: 0.597
episode: 234000 win_rate: 0.619
episode: 235000 win_rate: 0.601
episode: 236000 win_rate: 0.591
episode: 237000 win_rate: 0.601
episode: 238000 win_rate: 0.577
episode: 239000 win_rate: 0.622
episode: 240000 win_rate: 0.584
episode: 241000 win_rate: 0.606
episode: 242000 win_rate: 0.601
episode: 243000 win_rate: 0.593
episode: 244000 win_rate: 0.589
episode: 245000 win_rate: 0.575
episode: 246000 win_rate: 0.6
episode: 247000 win_rate: 0.589
episode: 248000 win_rate: 0.579
episode: 249000 win_rate: 0.606
episode: 250000 win_rate: 0.612
episode: 251000 win_rate: 0.579
episode: 252000 win_rate: 0.614
episode: 253000 win_rate: 0.596
episode: 254000 win_rate: 0.606

episode: 475000 win_rate: 0.58
episode: 476000 win_rate: 0.584
episode: 477000 win_rate: 0.574
episode: 478000 win_rate: 0.611
episode: 479000 win_rate: 0.573
episode: 480000 win_rate: 0.603
episode: 481000 win_rate: 0.587
episode: 482000 win_rate: 0.6
episode: 483000 win_rate: 0.611
episode: 484000 win_rate: 0.587
episode: 485000 win_rate: 0.574
episode: 486000 win_rate: 0.589
episode: 487000 win_rate: 0.5700000000000001
episode: 488000 win_rate: 0.555
episode: 489000 win_rate: 0.5589999999999999
episode: 490000 win_rate: 0.599
episode: 491000 win_rate: 0.584
episode: 492000 win_rate: 0.5660000000000001
episode: 493000 win_rate: 0.6
episode: 494000 win_rate: 0.584
episode: 495000 win_rate: 0.603
episode: 496000 win_rate: 0.575
episode: 497000 win_rate: 0.573
episode: 498000 win_rate: 0.583
episode: 499000 win_rate: 0.575
episode: 500000 win_rate: 0.575
episode: 501000 win_rate: 0.588
episode: 502000 win_rate: 0.556
episode: 503000 win_rate: 0.576
episode: 504000 win_rate: 0.546
episod

episode: 725000 win_rate: 0.614
episode: 726000 win_rate: 0.6
episode: 727000 win_rate: 0.599
episode: 728000 win_rate: 0.575
episode: 729000 win_rate: 0.597
episode: 730000 win_rate: 0.628
episode: 731000 win_rate: 0.582
episode: 732000 win_rate: 0.603
episode: 733000 win_rate: 0.586
episode: 734000 win_rate: 0.583
episode: 735000 win_rate: 0.583
episode: 736000 win_rate: 0.621
episode: 737000 win_rate: 0.598
episode: 738000 win_rate: 0.582
episode: 739000 win_rate: 0.609
episode: 740000 win_rate: 0.582
episode: 741000 win_rate: 0.589
episode: 742000 win_rate: 0.598
episode: 743000 win_rate: 0.592
episode: 744000 win_rate: 0.599
episode: 745000 win_rate: 0.627
episode: 746000 win_rate: 0.632
episode: 747000 win_rate: 0.596
episode: 748000 win_rate: 0.604
episode: 749000 win_rate: 0.589
episode: 750000 win_rate: 0.61
episode: 751000 win_rate: 0.626
episode: 752000 win_rate: 0.581
episode: 753000 win_rate: 0.597
episode: 754000 win_rate: 0.585
episode: 755000 win_rate: 0.616
episode: 75

episode: 980000 win_rate: 0.6
episode: 981000 win_rate: 0.593
episode: 982000 win_rate: 0.591
episode: 983000 win_rate: 0.607
episode: 984000 win_rate: 0.591
episode: 985000 win_rate: 0.584
episode: 986000 win_rate: 0.593
episode: 987000 win_rate: 0.598
episode: 988000 win_rate: 0.587
episode: 989000 win_rate: 0.579
episode: 990000 win_rate: 0.616
episode: 991000 win_rate: 0.593
episode: 992000 win_rate: 0.588
episode: 993000 win_rate: 0.623
episode: 994000 win_rate: 0.593
episode: 995000 win_rate: 0.576
episode: 996000 win_rate: 0.554
episode: 997000 win_rate: 0.582
episode: 998000 win_rate: 0.573
episode: 999000 win_rate: 0.582
episode: 1000000 win_rate: 0.585
episode: 1001000 win_rate: 0.581
episode: 1002000 win_rate: 0.5700000000000001
episode: 1003000 win_rate: 0.603
episode: 1004000 win_rate: 0.571
episode: 1005000 win_rate: 0.588
episode: 1006000 win_rate: 0.608
episode: 1007000 win_rate: 0.583
episode: 1008000 win_rate: 0.596
episode: 1009000 win_rate: 0.601
episode: 1010000 wi

episode: 1226000 win_rate: 0.601
episode: 1227000 win_rate: 0.552
episode: 1228000 win_rate: 0.605
episode: 1229000 win_rate: 0.5609999999999999
episode: 1230000 win_rate: 0.612
episode: 1231000 win_rate: 0.569
episode: 1232000 win_rate: 0.607
episode: 1233000 win_rate: 0.576
episode: 1234000 win_rate: 0.596
episode: 1235000 win_rate: 0.593
episode: 1236000 win_rate: 0.596
episode: 1237000 win_rate: 0.593
episode: 1238000 win_rate: 0.601
episode: 1239000 win_rate: 0.607
episode: 1240000 win_rate: 0.576
episode: 1241000 win_rate: 0.578
episode: 1242000 win_rate: 0.597
episode: 1243000 win_rate: 0.602
episode: 1244000 win_rate: 0.579
episode: 1245000 win_rate: 0.589
episode: 1246000 win_rate: 0.592
episode: 1247000 win_rate: 0.579
episode: 1248000 win_rate: 0.592
episode: 1249000 win_rate: 0.604
episode: 1250000 win_rate: 0.604
episode: 1251000 win_rate: 0.584
episode: 1252000 win_rate: 0.621
episode: 1253000 win_rate: 0.589
episode: 1254000 win_rate: 0.577
episode: 1255000 win_rate: 0.5

episode: 1473000 win_rate: 0.562
episode: 1474000 win_rate: 0.591
episode: 1475000 win_rate: 0.627
episode: 1476000 win_rate: 0.596
episode: 1477000 win_rate: 0.619
episode: 1478000 win_rate: 0.573
episode: 1479000 win_rate: 0.607
episode: 1480000 win_rate: 0.601
episode: 1481000 win_rate: 0.597
episode: 1482000 win_rate: 0.592
episode: 1483000 win_rate: 0.6
episode: 1484000 win_rate: 0.613
episode: 1485000 win_rate: 0.576
episode: 1486000 win_rate: 0.592
episode: 1487000 win_rate: 0.625
episode: 1488000 win_rate: 0.583
episode: 1489000 win_rate: 0.61
episode: 1490000 win_rate: 0.59
episode: 1491000 win_rate: 0.615
episode: 1492000 win_rate: 0.599
episode: 1493000 win_rate: 0.602
episode: 1494000 win_rate: 0.614
episode: 1495000 win_rate: 0.553
episode: 1496000 win_rate: 0.598
episode: 1497000 win_rate: 0.595
episode: 1498000 win_rate: 0.619
episode: 1499000 win_rate: 0.583
episode: 1500000 win_rate: 0.612
episode: 1501000 win_rate: 0.598
episode: 1502000 win_rate: 0.577
episode: 15030

episode: 1721000 win_rate: 0.589
episode: 1722000 win_rate: 0.609
episode: 1723000 win_rate: 0.592
episode: 1724000 win_rate: 0.611
episode: 1725000 win_rate: 0.618
episode: 1726000 win_rate: 0.609
episode: 1727000 win_rate: 0.602
episode: 1728000 win_rate: 0.59
episode: 1729000 win_rate: 0.577
episode: 1730000 win_rate: 0.628
episode: 1731000 win_rate: 0.579
episode: 1732000 win_rate: 0.606
episode: 1733000 win_rate: 0.633
episode: 1734000 win_rate: 0.606
episode: 1735000 win_rate: 0.632
episode: 1736000 win_rate: 0.621
episode: 1737000 win_rate: 0.5700000000000001
episode: 1738000 win_rate: 0.589
episode: 1739000 win_rate: 0.591
episode: 1740000 win_rate: 0.583
episode: 1741000 win_rate: 0.591
episode: 1742000 win_rate: 0.604
episode: 1743000 win_rate: 0.595
episode: 1744000 win_rate: 0.571
episode: 1745000 win_rate: 0.582
episode: 1746000 win_rate: 0.584
episode: 1747000 win_rate: 0.612
episode: 1748000 win_rate: 0.591
episode: 1749000 win_rate: 0.605
episode: 1750000 win_rate: 0.58

episode: 1967000 win_rate: 0.622
episode: 1968000 win_rate: 0.596
episode: 1969000 win_rate: 0.589
episode: 1970000 win_rate: 0.5609999999999999
episode: 1971000 win_rate: 0.574
episode: 1972000 win_rate: 0.606
episode: 1973000 win_rate: 0.642
episode: 1974000 win_rate: 0.602
episode: 1975000 win_rate: 0.573
episode: 1976000 win_rate: 0.579
episode: 1977000 win_rate: 0.61
episode: 1978000 win_rate: 0.594
episode: 1979000 win_rate: 0.601
episode: 1980000 win_rate: 0.596
episode: 1981000 win_rate: 0.56
episode: 1982000 win_rate: 0.598
episode: 1983000 win_rate: 0.606
episode: 1984000 win_rate: 0.587
episode: 1985000 win_rate: 0.584
episode: 1986000 win_rate: 0.597
episode: 1987000 win_rate: 0.611
episode: 1988000 win_rate: 0.623
episode: 1989000 win_rate: 0.605
episode: 1990000 win_rate: 0.58
episode: 1991000 win_rate: 0.599
episode: 1992000 win_rate: 0.612
episode: 1993000 win_rate: 0.599
episode: 1994000 win_rate: 0.587
episode: 1995000 win_rate: 0.625
episode: 1996000 win_rate: 0.605


episode: 2214000 win_rate: 0.615
episode: 2215000 win_rate: 0.604
episode: 2216000 win_rate: 0.596
episode: 2217000 win_rate: 0.622
episode: 2218000 win_rate: 0.574
episode: 2219000 win_rate: 0.606
episode: 2220000 win_rate: 0.596
episode: 2221000 win_rate: 0.605
episode: 2222000 win_rate: 0.585
episode: 2223000 win_rate: 0.588
episode: 2224000 win_rate: 0.588
episode: 2225000 win_rate: 0.609
episode: 2226000 win_rate: 0.577
episode: 2227000 win_rate: 0.597
episode: 2228000 win_rate: 0.596
episode: 2229000 win_rate: 0.631
episode: 2230000 win_rate: 0.598
episode: 2231000 win_rate: 0.6
episode: 2232000 win_rate: 0.611
episode: 2233000 win_rate: 0.587
episode: 2234000 win_rate: 0.576
episode: 2235000 win_rate: 0.585
episode: 2236000 win_rate: 0.607
episode: 2237000 win_rate: 0.6
episode: 2238000 win_rate: 0.572
episode: 2239000 win_rate: 0.627
episode: 2240000 win_rate: 0.586
episode: 2241000 win_rate: 0.584
episode: 2242000 win_rate: 0.578
episode: 2243000 win_rate: 0.593
episode: 22440

episode: 2461000 win_rate: 0.594
episode: 2462000 win_rate: 0.578
episode: 2463000 win_rate: 0.627
episode: 2464000 win_rate: 0.579
episode: 2465000 win_rate: 0.61
episode: 2466000 win_rate: 0.613
episode: 2467000 win_rate: 0.573
episode: 2468000 win_rate: 0.592
episode: 2469000 win_rate: 0.616
episode: 2470000 win_rate: 0.609
episode: 2471000 win_rate: 0.595
episode: 2472000 win_rate: 0.588
episode: 2473000 win_rate: 0.591
episode: 2474000 win_rate: 0.614
episode: 2475000 win_rate: 0.609
episode: 2476000 win_rate: 0.598
episode: 2477000 win_rate: 0.573
episode: 2478000 win_rate: 0.597
episode: 2479000 win_rate: 0.577
episode: 2480000 win_rate: 0.633
episode: 2481000 win_rate: 0.628
episode: 2482000 win_rate: 0.595
episode: 2483000 win_rate: 0.605
episode: 2484000 win_rate: 0.584
episode: 2485000 win_rate: 0.597
episode: 2486000 win_rate: 0.595
episode: 2487000 win_rate: 0.611
episode: 2488000 win_rate: 0.608
episode: 2489000 win_rate: 0.586
episode: 2490000 win_rate: 0.567
episode: 24

episode: 2710000 win_rate: 0.563
episode: 2711000 win_rate: 0.601
episode: 2712000 win_rate: 0.591
episode: 2713000 win_rate: 0.591
episode: 2714000 win_rate: 0.609
episode: 2715000 win_rate: 0.574
episode: 2716000 win_rate: 0.594
episode: 2717000 win_rate: 0.601
episode: 2718000 win_rate: 0.607
episode: 2719000 win_rate: 0.595
episode: 2720000 win_rate: 0.58
episode: 2721000 win_rate: 0.605
episode: 2722000 win_rate: 0.602
episode: 2723000 win_rate: 0.5700000000000001
episode: 2724000 win_rate: 0.616
episode: 2725000 win_rate: 0.634
episode: 2726000 win_rate: 0.55
episode: 2727000 win_rate: 0.623
episode: 2728000 win_rate: 0.598
episode: 2729000 win_rate: 0.565
episode: 2730000 win_rate: 0.584
episode: 2731000 win_rate: 0.621
episode: 2732000 win_rate: 0.584
episode: 2733000 win_rate: 0.601
episode: 2734000 win_rate: 0.579
episode: 2735000 win_rate: 0.583
episode: 2736000 win_rate: 0.624
episode: 2737000 win_rate: 0.582
episode: 2738000 win_rate: 0.607
episode: 2739000 win_rate: 0.592

episode: 2957000 win_rate: 0.603
episode: 2958000 win_rate: 0.58
episode: 2959000 win_rate: 0.585
episode: 2960000 win_rate: 0.592
episode: 2961000 win_rate: 0.598
episode: 2962000 win_rate: 0.578
episode: 2963000 win_rate: 0.631
episode: 2964000 win_rate: 0.592
episode: 2965000 win_rate: 0.616
episode: 2966000 win_rate: 0.612
episode: 2967000 win_rate: 0.593
episode: 2968000 win_rate: 0.61
episode: 2969000 win_rate: 0.574
episode: 2970000 win_rate: 0.594
episode: 2971000 win_rate: 0.571
episode: 2972000 win_rate: 0.609
episode: 2973000 win_rate: 0.587
episode: 2974000 win_rate: 0.6
episode: 2975000 win_rate: 0.582
episode: 2976000 win_rate: 0.572
episode: 2977000 win_rate: 0.59
episode: 2978000 win_rate: 0.576
episode: 2979000 win_rate: 0.579
episode: 2980000 win_rate: 0.597
episode: 2981000 win_rate: 0.61
episode: 2982000 win_rate: 0.589
episode: 2983000 win_rate: 0.577
episode: 2984000 win_rate: 0.578
episode: 2985000 win_rate: 0.59
episode: 2986000 win_rate: 0.598
episode: 2987000 

episode: 3204000 win_rate: 0.5660000000000001
episode: 3205000 win_rate: 0.607
episode: 3206000 win_rate: 0.606
episode: 3207000 win_rate: 0.563
episode: 3208000 win_rate: 0.585
episode: 3209000 win_rate: 0.608
episode: 3210000 win_rate: 0.606
episode: 3211000 win_rate: 0.589
episode: 3212000 win_rate: 0.5609999999999999
episode: 3213000 win_rate: 0.581
episode: 3214000 win_rate: 0.612
episode: 3215000 win_rate: 0.592
episode: 3216000 win_rate: 0.59
episode: 3217000 win_rate: 0.603
episode: 3218000 win_rate: 0.579
episode: 3219000 win_rate: 0.603
episode: 3220000 win_rate: 0.601
episode: 3221000 win_rate: 0.581
episode: 3222000 win_rate: 0.567
episode: 3223000 win_rate: 0.581
episode: 3224000 win_rate: 0.59
episode: 3225000 win_rate: 0.589
episode: 3226000 win_rate: 0.617
episode: 3227000 win_rate: 0.586
episode: 3228000 win_rate: 0.628
episode: 3229000 win_rate: 0.59
episode: 3230000 win_rate: 0.596
episode: 3231000 win_rate: 0.605
episode: 3232000 win_rate: 0.612
episode: 3233000 win

episode: 3450000 win_rate: 0.601
episode: 3451000 win_rate: 0.606
episode: 3452000 win_rate: 0.637
episode: 3453000 win_rate: 0.59
episode: 3454000 win_rate: 0.593
episode: 3455000 win_rate: 0.591
episode: 3456000 win_rate: 0.592
episode: 3457000 win_rate: 0.592
episode: 3458000 win_rate: 0.615
episode: 3459000 win_rate: 0.584
episode: 3460000 win_rate: 0.616
episode: 3461000 win_rate: 0.605
episode: 3462000 win_rate: 0.584
episode: 3463000 win_rate: 0.587
episode: 3464000 win_rate: 0.61
episode: 3465000 win_rate: 0.584
episode: 3466000 win_rate: 0.598
episode: 3467000 win_rate: 0.573
episode: 3468000 win_rate: 0.598
episode: 3469000 win_rate: 0.619
episode: 3470000 win_rate: 0.581
episode: 3471000 win_rate: 0.609
episode: 3472000 win_rate: 0.604
episode: 3473000 win_rate: 0.593
episode: 3474000 win_rate: 0.591
episode: 3475000 win_rate: 0.5640000000000001
episode: 3476000 win_rate: 0.598
episode: 3477000 win_rate: 0.592
episode: 3478000 win_rate: 0.6
episode: 3479000 win_rate: 0.612
e

episode: 3698000 win_rate: 0.556
episode: 3699000 win_rate: 0.6
episode: 3700000 win_rate: 0.614
episode: 3701000 win_rate: 0.572
episode: 3702000 win_rate: 0.592
episode: 3703000 win_rate: 0.586
episode: 3704000 win_rate: 0.609
episode: 3705000 win_rate: 0.614
episode: 3706000 win_rate: 0.602
episode: 3707000 win_rate: 0.608
episode: 3708000 win_rate: 0.583
episode: 3709000 win_rate: 0.578
episode: 3710000 win_rate: 0.608
episode: 3711000 win_rate: 0.604
episode: 3712000 win_rate: 0.578
episode: 3713000 win_rate: 0.625
episode: 3714000 win_rate: 0.594
episode: 3715000 win_rate: 0.583
episode: 3716000 win_rate: 0.576
episode: 3717000 win_rate: 0.569
episode: 3718000 win_rate: 0.5660000000000001
episode: 3719000 win_rate: 0.587
episode: 3720000 win_rate: 0.587
episode: 3721000 win_rate: 0.59
episode: 3722000 win_rate: 0.604
episode: 3723000 win_rate: 0.592
episode: 3724000 win_rate: 0.583
episode: 3725000 win_rate: 0.58
episode: 3726000 win_rate: 0.595
episode: 3727000 win_rate: 0.583
e

episode: 3946000 win_rate: 0.583
episode: 3947000 win_rate: 0.597
episode: 3948000 win_rate: 0.598
episode: 3949000 win_rate: 0.602
episode: 3950000 win_rate: 0.578
episode: 3951000 win_rate: 0.613
episode: 3952000 win_rate: 0.584
episode: 3953000 win_rate: 0.618
episode: 3954000 win_rate: 0.601
episode: 3955000 win_rate: 0.631
episode: 3956000 win_rate: 0.593
episode: 3957000 win_rate: 0.59
episode: 3958000 win_rate: 0.599
episode: 3959000 win_rate: 0.605
episode: 3960000 win_rate: 0.602
episode: 3961000 win_rate: 0.587
episode: 3962000 win_rate: 0.621
episode: 3963000 win_rate: 0.555
episode: 3964000 win_rate: 0.613
episode: 3965000 win_rate: 0.609
episode: 3966000 win_rate: 0.576
episode: 3967000 win_rate: 0.58
episode: 3968000 win_rate: 0.555
episode: 3969000 win_rate: 0.612
episode: 3970000 win_rate: 0.614
episode: 3971000 win_rate: 0.583
episode: 3972000 win_rate: 0.601
episode: 3973000 win_rate: 0.575
episode: 3974000 win_rate: 0.577
episode: 3975000 win_rate: 0.606
episode: 397

episode: 4193000 win_rate: 0.613
episode: 4194000 win_rate: 0.579
episode: 4195000 win_rate: 0.592
episode: 4196000 win_rate: 0.5680000000000001
episode: 4197000 win_rate: 0.601
episode: 4198000 win_rate: 0.61
episode: 4199000 win_rate: 0.562
episode: 4200000 win_rate: 0.594
episode: 4201000 win_rate: 0.579
episode: 4202000 win_rate: 0.593
episode: 4203000 win_rate: 0.614
episode: 4204000 win_rate: 0.585
episode: 4205000 win_rate: 0.592
episode: 4206000 win_rate: 0.583
episode: 4207000 win_rate: 0.576
episode: 4208000 win_rate: 0.619
episode: 4209000 win_rate: 0.592
episode: 4210000 win_rate: 0.61
episode: 4211000 win_rate: 0.625
episode: 4212000 win_rate: 0.588
episode: 4213000 win_rate: 0.581
episode: 4214000 win_rate: 0.601
episode: 4215000 win_rate: 0.619
episode: 4216000 win_rate: 0.588
episode: 4217000 win_rate: 0.619
episode: 4218000 win_rate: 0.579
episode: 4219000 win_rate: 0.588
episode: 4220000 win_rate: 0.605
episode: 4221000 win_rate: 0.5589999999999999
episode: 4222000 wi

episode: 4440000 win_rate: 0.59
episode: 4441000 win_rate: 0.612
episode: 4442000 win_rate: 0.595
episode: 4443000 win_rate: 0.613
episode: 4444000 win_rate: 0.5640000000000001
episode: 4445000 win_rate: 0.578
episode: 4446000 win_rate: 0.591
episode: 4447000 win_rate: 0.588
episode: 4448000 win_rate: 0.579
episode: 4449000 win_rate: 0.597
episode: 4450000 win_rate: 0.621
episode: 4451000 win_rate: 0.582
episode: 4452000 win_rate: 0.583
episode: 4453000 win_rate: 0.604
episode: 4454000 win_rate: 0.609
episode: 4455000 win_rate: 0.584
episode: 4456000 win_rate: 0.592
episode: 4457000 win_rate: 0.594
episode: 4458000 win_rate: 0.597
episode: 4459000 win_rate: 0.573
episode: 4460000 win_rate: 0.613
episode: 4461000 win_rate: 0.55
episode: 4462000 win_rate: 0.584
episode: 4463000 win_rate: 0.573
episode: 4464000 win_rate: 0.577
episode: 4465000 win_rate: 0.604
episode: 4466000 win_rate: 0.595
episode: 4467000 win_rate: 0.619
episode: 4468000 win_rate: 0.591
episode: 4469000 win_rate: 0.583

episode: 4688000 win_rate: 0.578
episode: 4689000 win_rate: 0.581
episode: 4690000 win_rate: 0.608
episode: 4691000 win_rate: 0.63
episode: 4692000 win_rate: 0.59
episode: 4693000 win_rate: 0.595
episode: 4694000 win_rate: 0.588
episode: 4695000 win_rate: 0.616
episode: 4696000 win_rate: 0.616
episode: 4697000 win_rate: 0.603
episode: 4698000 win_rate: 0.603
episode: 4699000 win_rate: 0.581
episode: 4700000 win_rate: 0.611
episode: 4701000 win_rate: 0.609
episode: 4702000 win_rate: 0.609
episode: 4703000 win_rate: 0.601
episode: 4704000 win_rate: 0.572
episode: 4705000 win_rate: 0.608
episode: 4706000 win_rate: 0.61
episode: 4707000 win_rate: 0.565
episode: 4708000 win_rate: 0.62
episode: 4709000 win_rate: 0.594
episode: 4710000 win_rate: 0.598
episode: 4711000 win_rate: 0.59
episode: 4712000 win_rate: 0.597
episode: 4713000 win_rate: 0.589
episode: 4714000 win_rate: 0.613
episode: 4715000 win_rate: 0.579
episode: 4716000 win_rate: 0.598
episode: 4717000 win_rate: 0.591
episode: 471800

episode: 4935000 win_rate: 0.582
episode: 4936000 win_rate: 0.598
episode: 4937000 win_rate: 0.601
episode: 4938000 win_rate: 0.592
episode: 4939000 win_rate: 0.582
episode: 4940000 win_rate: 0.61
episode: 4941000 win_rate: 0.594
episode: 4942000 win_rate: 0.613
episode: 4943000 win_rate: 0.582
episode: 4944000 win_rate: 0.591
episode: 4945000 win_rate: 0.605
episode: 4946000 win_rate: 0.605
episode: 4947000 win_rate: 0.574
episode: 4948000 win_rate: 0.611
episode: 4949000 win_rate: 0.585
episode: 4950000 win_rate: 0.5680000000000001
episode: 4951000 win_rate: 0.609
episode: 4952000 win_rate: 0.606
episode: 4953000 win_rate: 0.585
episode: 4954000 win_rate: 0.573
episode: 4955000 win_rate: 0.597
episode: 4956000 win_rate: 0.586
episode: 4957000 win_rate: 0.618
episode: 4958000 win_rate: 0.548
episode: 4959000 win_rate: 0.615
episode: 4960000 win_rate: 0.608
episode: 4961000 win_rate: 0.585
episode: 4962000 win_rate: 0.597
episode: 4963000 win_rate: 0.606
episode: 4964000 win_rate: 0.59

episode: 5181000 win_rate: 0.611
episode: 5182000 win_rate: 0.607
episode: 5183000 win_rate: 0.613
episode: 5184000 win_rate: 0.567
episode: 5185000 win_rate: 0.604
episode: 5186000 win_rate: 0.611
episode: 5187000 win_rate: 0.604
episode: 5188000 win_rate: 0.599
episode: 5189000 win_rate: 0.634
episode: 5190000 win_rate: 0.575
episode: 5191000 win_rate: 0.61
episode: 5192000 win_rate: 0.576
episode: 5193000 win_rate: 0.581
episode: 5194000 win_rate: 0.609
episode: 5195000 win_rate: 0.589
episode: 5196000 win_rate: 0.597
episode: 5197000 win_rate: 0.597
episode: 5198000 win_rate: 0.593
episode: 5199000 win_rate: 0.606
episode: 5200000 win_rate: 0.575
episode: 5201000 win_rate: 0.628
episode: 5202000 win_rate: 0.577
episode: 5203000 win_rate: 0.577
episode: 5204000 win_rate: 0.585
episode: 5205000 win_rate: 0.581
episode: 5206000 win_rate: 0.616
episode: 5207000 win_rate: 0.584
episode: 5208000 win_rate: 0.598
episode: 5209000 win_rate: 0.617
episode: 5210000 win_rate: 0.614
episode: 52

episode: 5428000 win_rate: 0.594
episode: 5429000 win_rate: 0.616
episode: 5430000 win_rate: 0.583
episode: 5431000 win_rate: 0.599
episode: 5432000 win_rate: 0.616
episode: 5433000 win_rate: 0.602
episode: 5434000 win_rate: 0.611
episode: 5435000 win_rate: 0.595
episode: 5436000 win_rate: 0.594
episode: 5437000 win_rate: 0.58
episode: 5438000 win_rate: 0.607
episode: 5439000 win_rate: 0.588
episode: 5440000 win_rate: 0.5660000000000001
episode: 5441000 win_rate: 0.591
episode: 5442000 win_rate: 0.584
episode: 5443000 win_rate: 0.596
episode: 5444000 win_rate: 0.595
episode: 5445000 win_rate: 0.591
episode: 5446000 win_rate: 0.581
episode: 5447000 win_rate: 0.616
episode: 5448000 win_rate: 0.583
episode: 5449000 win_rate: 0.613
episode: 5450000 win_rate: 0.602
episode: 5451000 win_rate: 0.576
episode: 5452000 win_rate: 0.596
episode: 5453000 win_rate: 0.61
episode: 5454000 win_rate: 0.599
episode: 5455000 win_rate: 0.6
episode: 5456000 win_rate: 0.577
episode: 5457000 win_rate: 0.584
e

episode: 5676000 win_rate: 0.612
episode: 5677000 win_rate: 0.595
episode: 5678000 win_rate: 0.582
episode: 5679000 win_rate: 0.577
episode: 5680000 win_rate: 0.587
episode: 5681000 win_rate: 0.592
episode: 5682000 win_rate: 0.585
episode: 5683000 win_rate: 0.61
episode: 5684000 win_rate: 0.606
episode: 5685000 win_rate: 0.5609999999999999
episode: 5686000 win_rate: 0.596
episode: 5687000 win_rate: 0.605
episode: 5688000 win_rate: 0.569
episode: 5689000 win_rate: 0.619
episode: 5690000 win_rate: 0.591
episode: 5691000 win_rate: 0.619
episode: 5692000 win_rate: 0.597
episode: 5693000 win_rate: 0.584
episode: 5694000 win_rate: 0.628
episode: 5695000 win_rate: 0.588
episode: 5696000 win_rate: 0.602
episode: 5697000 win_rate: 0.602
episode: 5698000 win_rate: 0.585
episode: 5699000 win_rate: 0.585
episode: 5700000 win_rate: 0.608
episode: 5701000 win_rate: 0.588
episode: 5702000 win_rate: 0.582
episode: 5703000 win_rate: 0.615
episode: 5704000 win_rate: 0.583
episode: 5705000 win_rate: 0.56

episode: 5922000 win_rate: 0.581
episode: 5923000 win_rate: 0.585
episode: 5924000 win_rate: 0.594
episode: 5925000 win_rate: 0.607
episode: 5926000 win_rate: 0.593
episode: 5927000 win_rate: 0.588
episode: 5928000 win_rate: 0.58
episode: 5929000 win_rate: 0.605
episode: 5930000 win_rate: 0.574
episode: 5931000 win_rate: 0.605
episode: 5932000 win_rate: 0.565
episode: 5933000 win_rate: 0.606
episode: 5934000 win_rate: 0.627
episode: 5935000 win_rate: 0.605
episode: 5936000 win_rate: 0.616
episode: 5937000 win_rate: 0.599
episode: 5938000 win_rate: 0.588
episode: 5939000 win_rate: 0.593
episode: 5940000 win_rate: 0.578
episode: 5941000 win_rate: 0.597
episode: 5942000 win_rate: 0.596
episode: 5943000 win_rate: 0.569
episode: 5944000 win_rate: 0.598
episode: 5945000 win_rate: 0.577
episode: 5946000 win_rate: 0.597
episode: 5947000 win_rate: 0.596
episode: 5948000 win_rate: 0.575
episode: 5949000 win_rate: 0.58
episode: 5950000 win_rate: 0.576
episode: 5951000 win_rate: 0.546
episode: 595