In [9]:
import pygame
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import numpy as np

In [10]:
import tensorflow
tensorflow.keras.utils.disable_interactive_logging()

In [11]:
#borders
WIDTH = 300
HEIGHT = 300
FPS = 200
#colors
WHITE= (255,255,255)
BLACK= (0,0,0)
RED= (255,0,0)
GREEN= (0,255,0)
BLUE= (0,0,255)
max_speed=17

In [12]:
class Enemy(pygame.sprite.Sprite):
    def __init__(self):
        pygame.sprite.Sprite.__init__(self)  
        self.image= pygame.Surface((12,12))
        self.image.fill(BLACK)
        self.rect= self.image.get_rect()
        self.radius=9
        pygame.draw.circle(self.image, RED, self.rect.center, self.radius)
        self.maxspeed = 4

        self.rect.x = random.randrange(0,WIDTH - self.rect.width) #Our enemy spawn inside of the borders, but it has 20,20 area so some pixels might spawn at outside of the borders. We fix that here
        self.rect.y = random.randrange(2,6) # spawn area at y 

        self.speedx= 0
        self.speedy= 4

    def update(self):
        self.rect.x += self.speedx
        self.rect.y += self.maxspeed

        if self.rect.top > HEIGHT + 20:
            self.rect.x = random.randrange(0,WIDTH - self.rect.width)
            self.rect.y = random.randrange(2,6)
            
            if self.maxspeed < 12:
                self.maxspeed +=1

    def getCoordinates(self):
        return (self.rect.x, self.rect.y)

In [13]:
class Player(pygame.sprite.Sprite):

    def __init__(self):
        pygame.sprite.Sprite.__init__(self)
        self.image = pygame.Surface((15,15))
        self.image.fill(GREEN)
        self.rect = self.image.get_rect()
        self.radius= 12
        pygame.draw.circle(self.image, BLACK, self.rect.center, self.radius)

        self.rect.centerx = WIDTH/2
        self.rect.bottom= HEIGHT - 1
        self.speedx= 0 


    def update(self, action):
        self.speedx = 0
        keystate = pygame.key.get_pressed()

        if keystate[pygame.K_LEFT] or action == 0:
            self.speedx= -7

        elif keystate[pygame.K_RIGHT]or action== 1:
            self.speedx= 7

        else:
            self.speedx= 0
        
        self.rect.x += self.speedx  


        if self.rect.right > WIDTH:
            self.rect.right = WIDTH

        if self.rect.left < 0:
            self.rect.left = 0

    def getCoordinates(self):
        return (self.rect.x, self.rect.y)

In [14]:
class DQLAgent:
    def __init__(self):
        #parameters / hyperparameters for agent
        self.state_size= 8 # distance around between player x,y and enemy x,y
        self.action_size= 3 #right, left, stay

        self.gamma= 0.95
        self.learning_rate= 0.01

        self.epsilon= 0.8     #agent gonna explore for sure at the start of training
        self.epsilon_decay= 0.995   #after per step of iteration, explore ratio will go down###^212dadada
        self.epsilon_min= 0.01  #until 0.01

        self.memory= deque(maxlen=500) #there will be max 1000 training datas at the memory. after 1000, it will delete data starting from first.

        self.model= self.build_model()  #Agents DNN model
        
    def build_model(self):
        #neural network for dqlearning
        model = Sequential()
        model.add(Dense(48, input_dim= self.state_size, activation= "relu")) #hidden layer, theres 48 neurons,
        model.add(Dense(self.action_size, activation= "linear")) # output layer, neurons count is even with our action space number.
        model.compile(loss="mse", optimizer=Adam(learning_rate=self.learning_rate)) #Loss value and optimizer model

        return model


    def remember(self, state, action, reward, next_state, done):
        #storage
        self.memory.append((state, action, reward, next_state, done)) # added the necessary values to memory
    

    def act(self,state):
        #action
        state= np.array(state)
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values= self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        #train
        if len(self.memory) < batch_size:
            return
        minibatch= random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state= np.array(state)
            next_state= np.array(next_state)

            if done:
                target= reward
            else:
                target= reward + self.gamma*np.amax(self.model.predict(next_state)[0])

            train_target= self.model.predict(state)
            train_target[0][action]= target
            self.model.fit(state,train_target,verbose= 0)

    def adaptiveEGreedy(self):

        if self.epsilon > self.epsilon_min: # if epsilon is bigger than epsilon min, 
            self.epsilon *= self.epsilon_decay

                Enviroment Part
                

In [15]:
class Env(pygame.sprite.Sprite):
    
    def __init__(self):
        self.clock = pygame.time.Clock()
        self.clock.tick(FPS)
        pygame.sprite.Sprite.__init__(self)
        self.all_sprite= pygame.sprite.Group()  
        self.enemy_group= pygame.sprite.Group()
        self.player= Player()
        self.all_sprite.add(self.player)
        self.m1= Enemy()
        self.m2= Enemy()
        self.m3= Enemy()
        self.m4= Enemy()
        self.all_sprite.add(self.m1)
        self.all_sprite.add(self.m2)
        self.all_sprite.add(self.m3)
        self.all_sprite.add(self.m4)
        self.enemy_group.add(self.m1)
        self.enemy_group.add(self.m2)
        self.enemy_group.add(self.m3)
        self.enemy_group.add(self.m4)

        self.screen = pygame.display.set_mode((WIDTH, HEIGHT))

        self.reward = 0
        self.total_reward= 0
        self.done = False
        self.agent= DQLAgent()

    def findDistance(self,a, b):
        d= a-b
        return d
    
    def step(self, action):
        state_list = []

        #update
        self.player.update(action)
        self.enemy_group.update()

        # get coordinate
        next_player_state= self.player.getCoordinates()
        next_m1_state= self.m1.getCoordinates()
        next_m2_state= self.m2.getCoordinates()
        next_m3_state= self.m3.getCoordinates()
        next_m4_state= self.m4.getCoordinates()
        #find distance
        state_list.append(self.findDistance(next_player_state[0], next_m1_state[0]))
        state_list.append(self.findDistance(next_player_state[1], next_m1_state[1]))
        state_list.append(self.findDistance(next_player_state[0], next_m2_state[0]))
        state_list.append(self.findDistance(next_player_state[1], next_m2_state[1]))
        state_list.append(self.findDistance(next_player_state[0], next_m3_state[0]))
        state_list.append(self.findDistance(next_player_state[1], next_m3_state[1]))
        state_list.append(self.findDistance(next_player_state[0], next_m4_state[0]))
        state_list.append(self.findDistance(next_player_state[1], next_m4_state[1]))  

        return [state_list]

    def initialState(self):
        self.all_sprite= pygame.sprite.Group()  
        self.enemy_group= pygame.sprite.Group()
        self.player= Player()
        self.all_sprite.add(self.player)
        self.m1= Enemy()
        self.m2= Enemy()
        self.m3= Enemy()
        self.m4= Enemy()
        self.all_sprite.add(self.m1)
        self.all_sprite.add(self.m2)
        self.all_sprite.add(self.m3)
        self.all_sprite.add(self.m4)
        self.enemy_group.add(self.m1)
        self.enemy_group.add(self.m2)
        self.enemy_group.add(self.m3)
        self.enemy_group.add(self.m4)

        self.reward = 0
        self.total_reward= 0
        self.done = False

        state_list = []

        #get coordinates
        player_state= self.player.getCoordinates()
        m1_state= self.m1.getCoordinates()
        m2_state= self.m2.getCoordinates()
        m3_state= self.m3.getCoordinates()
        m4_state= self.m4.getCoordinates()

        #find distance
        state_list.append(self.findDistance(player_state[0], m1_state[0]))
        state_list.append(self.findDistance(player_state[1], m1_state[1]))
        state_list.append(self.findDistance(player_state[0], m2_state[0]))
        state_list.append(self.findDistance(player_state[1], m2_state[1]))
        state_list.append(self.findDistance(player_state[0], m3_state[0]))
        state_list.append(self.findDistance(player_state[1], m3_state[1]))
        state_list.append(self.findDistance(player_state[0], m4_state[0]))
        state_list.append(self.findDistance(player_state[1], m4_state[1]))  

        return [state_list]
    

    def run(self):
        state= self.initialState()
        batch_size = 16
        running= True
        while running:
            self.clock.tick(FPS)
            self.reward += 1    

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
#update
            action = self.agent.act(state)
            next_state = self.step(action)
            self.total_reward += self.reward

            hits = pygame.sprite.spritecollide(self.player, self.enemy_group, False, pygame.sprite.collide_circle)
            
            if hits:
                self.reward= -2000
                self.total_reward += self.reward
                #self.done=True #running
                running= False
                print("Total reward: ", self.total_reward)

            self.agent.remember(state, action, self.reward, next_state, self.done )

            state= next_state
            self.agent.replay(batch_size)
            self.agent.adaptiveEGreedy()

            self.screen.fill(BLUE)
            self.all_sprite.draw(self.screen)
            pygame.display.flip()  # GÃ¶ster 
    pygame.quit()

if __name__ == "__main__":
    env=Env()
    liste1=[]
    t= 0
    while True:
        t += 1
        print("Episode: ",t)
        liste1.append(env.total_reward)

        pygame.init() # Initialize pygame 
        screen = pygame.display.set_mode((WIDTH,HEIGHT)) #create window.
        pygame.display.set_caption('RL GAME !')
        clock = pygame.time.Clock()
        env.run()
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

Episode:  1
Episode:  2


KeyboardInterrupt: 