In [1]:
from __future__ import division,print_function
import collections
import numpy as np
import pygame
import random
import os
from time import sleep

In [2]:
class MyWrappedGame(object):
    def __init__(self):
        #os.environ["SDL_VIDEODRIVER"] = "dummy"
        pygame.init()
        self.COLOR_WHITE = (255,255,255)
        self.COLOR_BLACK = (0,0,0)
        self.GAME_WIDTH = 400
        self.GAME_HEIGHT = 400
        self.BALL_WIDTH = 20
        self.BALL_HEIGHT = 20
        self.PADDLE_WIDTH = 50
        self.PADDLE_HEIGHT = 10
        self.GAME_FLOOR = 350
        self.GAME_CEILING = 10
        self.BALL_VELOCITY = 10
        self.PADDLE_VELOCITY = 20
        self.FONT_SIZE = 30
        self.MAX_TRIES_PER_GAME = 1
        self.CUSTOM_EVENT = pygame.USEREVENT + 1
        self.font = pygame.font.SysFont("Comic Sans MS",self.FONT_SIZE)
    
    def reset(self):
        self.frames = collections.deque(maxlen=4)
        self.game_over = False
        self.paddle_x = self.GAME_WIDTH // 2
        self.game_score =  0
        self.reward = 0
        self.ball_x = random.randint(0, self.GAME_WIDTH)
        self.ball_y = self.GAME_CEILING
        self.num_tries = 0
        self.screen = pygame.display.set_mode((self.GAME_WIDTH,self.GAME_HEIGHT))
        self.clock = pygame.time.Clock()
    
    def step(self,action):
        pygame.event.pump()
        if action == 0:
            self.paddle_x -= self.PADDLE_VELOCITY
            if self.paddle_x < 0:
                self.paddle_x = self.PADDLE_VELOCITY
        elif action == 2:
            self.paddle_x += self.PADDLE_VELOCITY
            if self.paddle_x > self.GAME_WIDTH - self.PADDLE_WIDTH:
                self.paddle_x = self.GAME_WIDTH - self.PADDLE_WIDTH -self.PADDLE_VELOCITY
        else:
            pass
        self.screen.fill(self.COLOR_BLACK)
        score_text = self.font.render("Score:{:d}/{:d},Ball:{:d}".format(self.game_score,self.MAX_TRIES_PER_GAME,self.num_tries),True,self.COLOR_WHITE)
        self.screen.blit(score_text,((self.GAME_WIDTH - score_text.get_width()) // 2,(self.GAME_FLOOR + self.FONT_SIZE // 2)))
        self.ball_y += self.BALL_VELOCITY
        ball = pygame.draw.rect(self.screen, self.COLOR_WHITE,pygame.Rect(self.ball_x,self.ball_y,self.BALL_WIDTH,self.BALL_HEIGHT))
        paddle = pygame.draw.rect(self.screen, self.COLOR_WHITE,pygame.Rect(self.paddle_x,self.GAME_FLOOR,self.PADDLE_WIDTH,self.PADDLE_HEIGHT))
        self.reward = 0
        if self.ball_y >= self.GAME_FLOOR - self.BALL_WIDTH // 2:
            if ball.colliderect(paddle):
                self.reward = 1
            else:
                self.reward = -1
        
            self.game_score += self.reward
            self.ball_x = random.randint(0, self.GAME_WIDTH)
            self.ball_y = self.GAME_CEILING
            self.num_tries += 1
        
        pygame.display.flip()
        
        self.frames.append(pygame.surfarray.array2d(self.screen))
        
        if self.num_tries >= self.MAX_TRIES_PER_GAME:
            self.game_over = True
        
        self.clock.tick(30)
        return np.array(list(self.frames)),self.reward, self.game_over
    

In [3]:
from keras.models import Sequential
from keras.layers.core import Activation,Dense,Flatten
from keras.layers.convolutional import Conv2D
from keras.optimizers import Adam
from scipy.misc.pilutil import imresize 
from keras.models import load_model

Using TensorFlow backend.


In [4]:
def preprocess_images(images):
    if images.shape[0] < 4:
        x_t = images[0]
        x_t = imresize(x_t,(80,80))
        x_t = x_t.astype("float")
        x_t /= 255.0
        s_t = np.stack((x_t,x_t,x_t,x_t),axis=2)
    else:
        xt_list=[]
        for i in range(images.shape[0]):
            x_t = imresize(images[i],(80,80))
            x_t = x_t.astype("float")
            x_t /= 255.0
            xt_list.append(x_t)
        s_t = np.stack((xt_list[0],xt_list[1],xt_list[2],xt_list[3]),axis=2)
    s_t = np.expand_dims(s_t,axis=0)
    return s_t

def get_next_batch(experince,model,num_actions,gamma,batch_size):
    batch_indices = np.random.randint(low=0,high=len(experince),size=batch_size)
    batch = [experince[i] for i in batch_indices]
    X = np.zeros((batch_size,80,80,4))
    Y = np.zeros((batch_size,num_actions))
    for i in range(len(batch)):
        s_t,a_t,r_t,s_tp1,game_over = batch[i]
        X[i] = s_t
        Y[i] = model.predict(s_t)[0]
        Q_sa = np.max(model.predict(s_tp1)[0])
        if game_over:
            Y[i,a_t] = r_t
        else:
            Y[i,a_t] = r_t + gamma * Q_sa
    return X,Y

In [5]:
model = Sequential()
model.add(Conv2D(32, kernel_size=8, strides=4,kernel_initializer="normal",padding="same",input_shape=(80,80,4)))
model.add(Activation("relu"))
model.add(Conv2D(64, kernel_size=4, strides=2,kernel_initializer="normal",padding="same"))
model.add(Activation("relu"))
model.add(Conv2D(64, kernel_size=3, strides=1,kernel_initializer="normal",padding="same"))
model.add(Activation("relu"))
model.add(Flatten())
model.add(Dense(512,kernel_initializer="normal"))
model.add(Activation("relu"))
model.add(Dense(3,kernel_initializer="normal"))
model.compile(optimizer=Adam(lr=1e-6),loss="mse")

In [6]:
NUM_ACTIONS = 3
GAMMA = 0.99
INITIAL_EPSILON = 0.1
FINAL_EPSILON = 0.0001
MEMORY_SIZE = 7000
NUM_EPOCHS_OBSERVE = 100
NUM_EPOCHS_TRAIN = 2000

BATCH_SIZE = 32
NUM_EPOCHS = NUM_EPOCHS_OBSERVE + NUM_EPOCHS_TRAIN

game = MyWrappedGame()
experince = collections.deque(maxlen=MEMORY_SIZE)

num_games,num_wins=0,0
epsilon = INITIAL_EPSILON

In [14]:
for e in range(NUM_EPOCHS):
    game.reset()
    loss = 0.0
    a_0=1
    x_t,r_0,game_over = game.step(a_0)
    s_t = preprocess_image(x_t)
    game_over = False
    while not game_over:
        s_tml = s_t
        #следующее действие
        if e <= NUM_EPOCHS_OBSERVE:
            a_t = np.random.randint(low=0,high=NUM_ACTIONS,size=1)[0]
        else:
            if np.random.rand() <= epsilon:
                a_t = np.random.randint(low=0,high=NUM_ACTIONS,size=1)[0]
            else:
                q = model.predict(s_t)[0]
                a_t = np.argmax(q)
        # применить действие к игре
        x_t, r_t, game_over=game.step(a_t)
        s_t = preprocess_images(x_t)
        #если вознагрождение положительно, увеличить num_wins
        if r_t == 1:
            num_wins += 1
        #сохраним опыт
        experince.append((s_tml, a_t, r_t, s_t, game_over))
        if e > NUM_EPOCHS_OBSERVE:
            X,Y = get_next_batch(experince,model,NUM_ACTIONS,GAMMA,BATCH_SIZE)
            loss += model.train_on_batch(X,Y)
            if epsilon > FINAL_EPSILON:
                epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / NUM_EPOCHS
    print("Epoch {:04d}/{:d} | Loss {:.5f} | Win Count {:d}".format(e + 1,NUM_EPOCHS,loss,num_wins))

Epoch 0001/2100 | Loss 0.00000 | Win Count 0
Epoch 0002/2100 | Loss 0.00000 | Win Count 0
Epoch 0003/2100 | Loss 0.00000 | Win Count 0
Epoch 0004/2100 | Loss 0.00000 | Win Count 0
Epoch 0005/2100 | Loss 0.00000 | Win Count 1
Epoch 0006/2100 | Loss 0.00000 | Win Count 1
Epoch 0007/2100 | Loss 0.00000 | Win Count 1
Epoch 0008/2100 | Loss 0.00000 | Win Count 1
Epoch 0009/2100 | Loss 0.00000 | Win Count 1
Epoch 0010/2100 | Loss 0.00000 | Win Count 1
Epoch 0011/2100 | Loss 0.00000 | Win Count 2
Epoch 0012/2100 | Loss 0.00000 | Win Count 2
Epoch 0013/2100 | Loss 0.00000 | Win Count 2
Epoch 0014/2100 | Loss 0.00000 | Win Count 2
Epoch 0015/2100 | Loss 0.00000 | Win Count 2
Epoch 0016/2100 | Loss 0.00000 | Win Count 3
Epoch 0017/2100 | Loss 0.00000 | Win Count 3
Epoch 0018/2100 | Loss 0.00000 | Win Count 4
Epoch 0019/2100 | Loss 0.00000 | Win Count 4
Epoch 0020/2100 | Loss 0.00000 | Win Count 4
Epoch 0021/2100 | Loss 0.00000 | Win Count 4
Epoch 0022/2100 | Loss 0.00000 | Win Count 4
Epoch 0023

Epoch 0181/2100 | Loss 0.63587 | Win Count 48
Epoch 0182/2100 | Loss 0.64260 | Win Count 49
Epoch 0183/2100 | Loss 0.47501 | Win Count 50
Epoch 0184/2100 | Loss 0.41015 | Win Count 51
Epoch 0185/2100 | Loss 0.44626 | Win Count 51
Epoch 0186/2100 | Loss 0.54461 | Win Count 52
Epoch 0187/2100 | Loss 0.52279 | Win Count 52
Epoch 0188/2100 | Loss 0.51399 | Win Count 52
Epoch 0189/2100 | Loss 0.42597 | Win Count 53
Epoch 0190/2100 | Loss 0.45327 | Win Count 53
Epoch 0191/2100 | Loss 0.55181 | Win Count 53
Epoch 0192/2100 | Loss 0.51915 | Win Count 54
Epoch 0193/2100 | Loss 0.43856 | Win Count 54
Epoch 0194/2100 | Loss 0.42911 | Win Count 55
Epoch 0195/2100 | Loss 0.43535 | Win Count 55
Epoch 0196/2100 | Loss 0.33726 | Win Count 56
Epoch 0197/2100 | Loss 0.45311 | Win Count 57
Epoch 0198/2100 | Loss 0.40242 | Win Count 58
Epoch 0199/2100 | Loss 0.43050 | Win Count 58
Epoch 0200/2100 | Loss 0.44102 | Win Count 58
Epoch 0201/2100 | Loss 0.39864 | Win Count 59
Epoch 0202/2100 | Loss 0.51764 | W

Epoch 0358/2100 | Loss 0.15295 | Win Count 139
Epoch 0359/2100 | Loss 0.10375 | Win Count 140
Epoch 0360/2100 | Loss 0.06858 | Win Count 140
Epoch 0361/2100 | Loss 0.16479 | Win Count 141
Epoch 0362/2100 | Loss 0.19521 | Win Count 142
Epoch 0363/2100 | Loss 0.09449 | Win Count 142
Epoch 0364/2100 | Loss 0.12417 | Win Count 142
Epoch 0365/2100 | Loss 0.14360 | Win Count 142
Epoch 0366/2100 | Loss 0.14775 | Win Count 142
Epoch 0367/2100 | Loss 0.14241 | Win Count 143
Epoch 0368/2100 | Loss 0.12344 | Win Count 143
Epoch 0369/2100 | Loss 0.11763 | Win Count 144
Epoch 0370/2100 | Loss 0.19050 | Win Count 144
Epoch 0371/2100 | Loss 0.11003 | Win Count 145
Epoch 0372/2100 | Loss 0.13802 | Win Count 145
Epoch 0373/2100 | Loss 0.10694 | Win Count 146
Epoch 0374/2100 | Loss 0.11087 | Win Count 146
Epoch 0375/2100 | Loss 0.10626 | Win Count 146
Epoch 0376/2100 | Loss 0.10140 | Win Count 146
Epoch 0377/2100 | Loss 0.13725 | Win Count 147
Epoch 0378/2100 | Loss 0.09000 | Win Count 148
Epoch 0379/21

Epoch 0533/2100 | Loss 0.14771 | Win Count 245
Epoch 0534/2100 | Loss 0.17713 | Win Count 245
Epoch 0535/2100 | Loss 0.16569 | Win Count 245
Epoch 0536/2100 | Loss 0.10324 | Win Count 246
Epoch 0537/2100 | Loss 0.04509 | Win Count 246
Epoch 0538/2100 | Loss 0.19515 | Win Count 246
Epoch 0539/2100 | Loss 0.13672 | Win Count 247
Epoch 0540/2100 | Loss 0.12824 | Win Count 247
Epoch 0541/2100 | Loss 0.09893 | Win Count 248
Epoch 0542/2100 | Loss 0.10611 | Win Count 249
Epoch 0543/2100 | Loss 0.07411 | Win Count 250
Epoch 0544/2100 | Loss 0.08267 | Win Count 250
Epoch 0545/2100 | Loss 0.15411 | Win Count 251
Epoch 0546/2100 | Loss 0.16023 | Win Count 251
Epoch 0547/2100 | Loss 0.10988 | Win Count 251
Epoch 0548/2100 | Loss 0.16564 | Win Count 251
Epoch 0549/2100 | Loss 0.07261 | Win Count 252
Epoch 0550/2100 | Loss 0.10414 | Win Count 253
Epoch 0551/2100 | Loss 0.10663 | Win Count 254
Epoch 0552/2100 | Loss 0.09496 | Win Count 255
Epoch 0553/2100 | Loss 0.21392 | Win Count 256
Epoch 0554/21

Epoch 0708/2100 | Loss 0.12967 | Win Count 340
Epoch 0709/2100 | Loss 0.11228 | Win Count 341
Epoch 0710/2100 | Loss 0.13750 | Win Count 341
Epoch 0711/2100 | Loss 0.08852 | Win Count 342
Epoch 0712/2100 | Loss 0.09032 | Win Count 342
Epoch 0713/2100 | Loss 0.08022 | Win Count 343
Epoch 0714/2100 | Loss 0.10564 | Win Count 343
Epoch 0715/2100 | Loss 0.09311 | Win Count 344
Epoch 0716/2100 | Loss 0.09283 | Win Count 345
Epoch 0717/2100 | Loss 0.05874 | Win Count 346
Epoch 0718/2100 | Loss 0.06874 | Win Count 346
Epoch 0719/2100 | Loss 0.08891 | Win Count 347
Epoch 0720/2100 | Loss 0.08614 | Win Count 348
Epoch 0721/2100 | Loss 0.07234 | Win Count 348
Epoch 0722/2100 | Loss 0.07369 | Win Count 349
Epoch 0723/2100 | Loss 0.07161 | Win Count 350
Epoch 0724/2100 | Loss 0.06918 | Win Count 351
Epoch 0725/2100 | Loss 0.10394 | Win Count 352
Epoch 0726/2100 | Loss 0.10530 | Win Count 353
Epoch 0727/2100 | Loss 0.11526 | Win Count 354
Epoch 0728/2100 | Loss 0.09315 | Win Count 354
Epoch 0729/21

Epoch 0883/2100 | Loss 0.08516 | Win Count 455
Epoch 0884/2100 | Loss 0.03955 | Win Count 456
Epoch 0885/2100 | Loss 0.08265 | Win Count 456
Epoch 0886/2100 | Loss 0.04133 | Win Count 457
Epoch 0887/2100 | Loss 0.06333 | Win Count 457
Epoch 0888/2100 | Loss 0.04703 | Win Count 458
Epoch 0889/2100 | Loss 0.05355 | Win Count 458
Epoch 0890/2100 | Loss 0.03974 | Win Count 459
Epoch 0891/2100 | Loss 0.08331 | Win Count 460
Epoch 0892/2100 | Loss 0.04998 | Win Count 460
Epoch 0893/2100 | Loss 0.04186 | Win Count 460
Epoch 0894/2100 | Loss 0.06350 | Win Count 461
Epoch 0895/2100 | Loss 0.07575 | Win Count 461
Epoch 0896/2100 | Loss 0.02956 | Win Count 462
Epoch 0897/2100 | Loss 0.03392 | Win Count 463
Epoch 0898/2100 | Loss 0.07536 | Win Count 463
Epoch 0899/2100 | Loss 0.03347 | Win Count 463
Epoch 0900/2100 | Loss 0.05410 | Win Count 464
Epoch 0901/2100 | Loss 0.03590 | Win Count 464
Epoch 0902/2100 | Loss 0.04514 | Win Count 465
Epoch 0903/2100 | Loss 0.03098 | Win Count 466
Epoch 0904/21

Epoch 1058/2100 | Loss 0.09344 | Win Count 577
Epoch 1059/2100 | Loss 0.03120 | Win Count 577
Epoch 1060/2100 | Loss 0.04920 | Win Count 578
Epoch 1061/2100 | Loss 0.02983 | Win Count 579
Epoch 1062/2100 | Loss 0.03591 | Win Count 580
Epoch 1063/2100 | Loss 0.02919 | Win Count 581
Epoch 1064/2100 | Loss 0.08470 | Win Count 582
Epoch 1065/2100 | Loss 0.02928 | Win Count 583
Epoch 1066/2100 | Loss 0.02256 | Win Count 583
Epoch 1067/2100 | Loss 0.04625 | Win Count 584
Epoch 1068/2100 | Loss 0.10910 | Win Count 585
Epoch 1069/2100 | Loss 0.02343 | Win Count 585
Epoch 1070/2100 | Loss 0.03660 | Win Count 586
Epoch 1071/2100 | Loss 0.03859 | Win Count 586
Epoch 1072/2100 | Loss 0.03993 | Win Count 587
Epoch 1073/2100 | Loss 0.02331 | Win Count 588
Epoch 1074/2100 | Loss 0.05760 | Win Count 589
Epoch 1075/2100 | Loss 0.05830 | Win Count 590
Epoch 1076/2100 | Loss 0.07131 | Win Count 591
Epoch 1077/2100 | Loss 0.05194 | Win Count 592
Epoch 1078/2100 | Loss 0.03725 | Win Count 593
Epoch 1079/21

Epoch 1233/2100 | Loss 0.05569 | Win Count 703
Epoch 1234/2100 | Loss 0.02239 | Win Count 704
Epoch 1235/2100 | Loss 0.05020 | Win Count 705
Epoch 1236/2100 | Loss 0.03103 | Win Count 706
Epoch 1237/2100 | Loss 0.07209 | Win Count 707
Epoch 1238/2100 | Loss 0.04457 | Win Count 708
Epoch 1239/2100 | Loss 0.03259 | Win Count 708
Epoch 1240/2100 | Loss 0.05149 | Win Count 709
Epoch 1241/2100 | Loss 0.06078 | Win Count 709
Epoch 1242/2100 | Loss 0.06826 | Win Count 709
Epoch 1243/2100 | Loss 0.03498 | Win Count 709
Epoch 1244/2100 | Loss 0.02159 | Win Count 710
Epoch 1245/2100 | Loss 0.06840 | Win Count 711
Epoch 1246/2100 | Loss 0.07110 | Win Count 712
Epoch 1247/2100 | Loss 0.09538 | Win Count 712
Epoch 1248/2100 | Loss 0.05680 | Win Count 713
Epoch 1249/2100 | Loss 0.02015 | Win Count 714
Epoch 1250/2100 | Loss 0.04377 | Win Count 715
Epoch 1251/2100 | Loss 0.03227 | Win Count 716
Epoch 1252/2100 | Loss 0.05397 | Win Count 717
Epoch 1253/2100 | Loss 0.04368 | Win Count 718
Epoch 1254/21

Epoch 1408/2100 | Loss 0.03108 | Win Count 834
Epoch 1409/2100 | Loss 0.03595 | Win Count 835
Epoch 1410/2100 | Loss 0.02760 | Win Count 836
Epoch 1411/2100 | Loss 0.06803 | Win Count 837
Epoch 1412/2100 | Loss 0.06646 | Win Count 838
Epoch 1413/2100 | Loss 0.08874 | Win Count 838
Epoch 1414/2100 | Loss 0.06925 | Win Count 839
Epoch 1415/2100 | Loss 0.05018 | Win Count 840
Epoch 1416/2100 | Loss 0.04580 | Win Count 841
Epoch 1417/2100 | Loss 0.03797 | Win Count 842
Epoch 1418/2100 | Loss 0.08669 | Win Count 843
Epoch 1419/2100 | Loss 0.05911 | Win Count 844
Epoch 1420/2100 | Loss 0.01862 | Win Count 845
Epoch 1421/2100 | Loss 0.11065 | Win Count 846
Epoch 1422/2100 | Loss 0.04807 | Win Count 846
Epoch 1423/2100 | Loss 0.03643 | Win Count 847
Epoch 1424/2100 | Loss 0.05678 | Win Count 847
Epoch 1425/2100 | Loss 0.03350 | Win Count 847
Epoch 1426/2100 | Loss 0.03525 | Win Count 848
Epoch 1427/2100 | Loss 0.04806 | Win Count 849
Epoch 1428/2100 | Loss 0.05001 | Win Count 850
Epoch 1429/21

Epoch 1583/2100 | Loss 0.04973 | Win Count 954
Epoch 1584/2100 | Loss 0.02447 | Win Count 954
Epoch 1585/2100 | Loss 0.05956 | Win Count 954
Epoch 1586/2100 | Loss 0.11174 | Win Count 955
Epoch 1587/2100 | Loss 0.03545 | Win Count 956
Epoch 1588/2100 | Loss 0.04165 | Win Count 957
Epoch 1589/2100 | Loss 0.08988 | Win Count 958
Epoch 1590/2100 | Loss 0.07324 | Win Count 959
Epoch 1591/2100 | Loss 0.04199 | Win Count 960
Epoch 1592/2100 | Loss 0.04651 | Win Count 961
Epoch 1593/2100 | Loss 0.04698 | Win Count 962
Epoch 1594/2100 | Loss 0.04100 | Win Count 963
Epoch 1595/2100 | Loss 0.05063 | Win Count 964
Epoch 1596/2100 | Loss 0.04609 | Win Count 964
Epoch 1597/2100 | Loss 0.03171 | Win Count 965
Epoch 1598/2100 | Loss 0.04362 | Win Count 965
Epoch 1599/2100 | Loss 0.02719 | Win Count 965
Epoch 1600/2100 | Loss 0.05159 | Win Count 966
Epoch 1601/2100 | Loss 0.05389 | Win Count 967
Epoch 1602/2100 | Loss 0.04113 | Win Count 967
Epoch 1603/2100 | Loss 0.04914 | Win Count 967
Epoch 1604/21

Epoch 1756/2100 | Loss 0.05097 | Win Count 1093
Epoch 1757/2100 | Loss 0.07627 | Win Count 1094
Epoch 1758/2100 | Loss 0.05172 | Win Count 1095
Epoch 1759/2100 | Loss 0.02515 | Win Count 1096
Epoch 1760/2100 | Loss 0.05616 | Win Count 1096
Epoch 1761/2100 | Loss 0.03090 | Win Count 1097
Epoch 1762/2100 | Loss 0.02654 | Win Count 1098
Epoch 1763/2100 | Loss 0.06619 | Win Count 1098
Epoch 1764/2100 | Loss 0.04445 | Win Count 1099
Epoch 1765/2100 | Loss 0.05480 | Win Count 1100
Epoch 1766/2100 | Loss 0.02184 | Win Count 1100
Epoch 1767/2100 | Loss 0.02289 | Win Count 1101
Epoch 1768/2100 | Loss 0.03225 | Win Count 1102
Epoch 1769/2100 | Loss 0.02428 | Win Count 1103
Epoch 1770/2100 | Loss 0.02717 | Win Count 1104
Epoch 1771/2100 | Loss 0.01518 | Win Count 1105
Epoch 1772/2100 | Loss 0.07195 | Win Count 1106
Epoch 1773/2100 | Loss 0.01793 | Win Count 1107
Epoch 1774/2100 | Loss 0.02827 | Win Count 1108
Epoch 1775/2100 | Loss 0.01896 | Win Count 1109
Epoch 1776/2100 | Loss 0.02373 | Win Cou

Epoch 1927/2100 | Loss 0.07919 | Win Count 1225
Epoch 1928/2100 | Loss 0.07250 | Win Count 1226
Epoch 1929/2100 | Loss 0.09914 | Win Count 1227
Epoch 1930/2100 | Loss 0.06642 | Win Count 1228
Epoch 1931/2100 | Loss 0.02749 | Win Count 1229
Epoch 1932/2100 | Loss 0.04818 | Win Count 1230
Epoch 1933/2100 | Loss 0.06673 | Win Count 1231
Epoch 1934/2100 | Loss 0.03706 | Win Count 1231
Epoch 1935/2100 | Loss 0.08797 | Win Count 1231
Epoch 1936/2100 | Loss 0.03830 | Win Count 1232
Epoch 1937/2100 | Loss 0.05316 | Win Count 1232
Epoch 1938/2100 | Loss 0.03343 | Win Count 1232
Epoch 1939/2100 | Loss 0.08175 | Win Count 1232
Epoch 1940/2100 | Loss 0.05189 | Win Count 1233
Epoch 1941/2100 | Loss 0.08098 | Win Count 1234
Epoch 1942/2100 | Loss 0.03732 | Win Count 1235
Epoch 1943/2100 | Loss 0.07957 | Win Count 1236
Epoch 1944/2100 | Loss 0.04666 | Win Count 1237
Epoch 1945/2100 | Loss 0.05734 | Win Count 1238
Epoch 1946/2100 | Loss 0.03683 | Win Count 1239
Epoch 1947/2100 | Loss 0.09644 | Win Cou

Epoch 2098/2100 | Loss 0.04609 | Win Count 1351
Epoch 2099/2100 | Loss 0.05945 | Win Count 1352
Epoch 2100/2100 | Loss 0.04926 | Win Count 1353


In [None]:
num_games, num_wins = 0,0
for i in range(100):
    game.reset()
    a_0 = 1
    x_t, r_0, game_over = game.step(a_0)
    s_t = preprocess_images(x_t)
    game_over = False
    while not game_over:
        s_tml = s_t
        q = model.predict(s_t)[0]
        a_t = np.argmax(q)
        x_t, r_t, game_over = game.step(a_t)
        s_t = preprocess_images(x_t)
        if r_t == 1:
            num_wins += 1
    num_games += 1
    print("Game: {:03d}, Wins: {:03d}".format(num_games,num_wins), end="\n")

Game: 001, Wins: 001
Game: 002, Wins: 002
Game: 003, Wins: 002
Game: 004, Wins: 003
Game: 005, Wins: 003
Game: 006, Wins: 003
Game: 007, Wins: 003
Game: 008, Wins: 003
Game: 009, Wins: 004
Game: 010, Wins: 004
Game: 011, Wins: 005
Game: 012, Wins: 006


In [7]:
model.load_weights("model_weight_tenis.HDF5")