In [1]:
from vizdoom import *
import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

import random
import time

from skimage import transform
from skimage.color import rgb2gray

import warnings
warnings.filterwarnings('ignore')

game = DoomGame()
#PATH OF DEATHMATCH ViZDoom-master/scenarios/deathmatch.cfg
#game.load_config("vizdoom/scenarios/deathmatch.cfg")
game.load_config("YOUR PATH/ViZDoom-master/scenarios/basic.cfg")
game.set_screen_resolution(ScreenResolution.RES_1280X1024)
game.get_available_buttons()

buttons = game.get_available_buttons()
print(buttons)
actions = []
actions_dict = {}
for i in range(buttons.index(Button.ATTACK)+1):
    act_name = str(buttons[i])[7:]
    act = []
    for k in range(buttons.index(Button.ATTACK)+1):
        act.append(0)
    act[i] = 1
    actions.append(act)
    actions_dict[str(act_name)+'-'+str(i+1)]=act

[Button.MOVE_LEFT, Button.MOVE_RIGHT, Button.ATTACK]


In [13]:
class Agent:
    def __init__(self, memory_size=100000):
        
        self.gamma = 0.95
        
        self.explore_start = 1.0
        self.explore_stop = 0.01
        self.decay_rate = 0.0001
    
        self.state = self.get_state()
        self.actions = self.get_actions()
        self.impressions = 0
        self.stack_size = 3
        
        #memory
        #by default - 1000 cell 
        #will be 4 cells in each cell -> experience tuple()
        self.memory = deque(maxlen = memory_size)
        
        self.frame = self.make_frame()
        
        #models: for Q and target Q
        self.model = self.build_model()
        self.target_model = self.build_model()
           
    def get_state(self, state=np.zeros((3, 1024, 1280), dtype='uint8')):
        state = state
        return state
                          #attack
    def get_actions(self, actions=[1, 0, 0]):
        actions = actions
        return actions
        
    def act(self, state, actions=actions):
        if np.random.rand() <= self.explore_start:
            action = random.choice(actions)
            return action
        values_of_actions = self.model.predict(state)
        action = actions[np.argmax(values_of_actions)]
        return action
    
    def make_frame(self, state=np.zeros((3, 1024, 1280), dtype='uint8')):
        #вычитание фреймов друг из друга передает динамику (velocity)
        dymanic_frame = np.array(state[0]) - np.array(state[1])
        gray_frame = rgb2gray(dymanic_frame)
        cropped_frame = dymanic_frame[150:, :]
        resized_frame = transform.resize(cropped_frame, [500, 500])
        frame = np.reshape(resized_frame, (1, 500, 500, 1))
        return frame
    
    def get_impressions(self):
        #64 refers to maximum backet
        if(self.impressions) < 64:
            impressions = self.impressions + 1
            return impressions
        return self.impressions
        
    def experience(self, frame, action, reward, next_frame, done):
        experience = (frame, action, reward, next_frame, done)
        self.memory.append(experience)
        
    def build_model(self):
        #delete non-actual models in tensorflowboard (because every new object of Agent() gets NEW model)
        tf.keras.backend.clear_session()
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Conv2D(input_shape=(500, 500, 1),
                                         data_format='channels_last',
                                         filters=32, 
                                         kernel_size=[60, 60],
                                         strides = (4, 4),
                                         padding = 'valid',
                                         activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(epsilon=0.001))
        model.add(tf.keras.layers.Conv2D(filters=64, 
                                         kernel_size=[40, 40],
                                         strides = (4, 4),
                                         padding = 'valid',
                                         activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(epsilon=0.001))
        model.add(tf.keras.layers.Conv2D(filters=128, 
                                         kernel_size=[10, 10],
                                         strides = (4, 4),
                                         padding = 'valid',
                                         activation='relu'))
        model.add(tf.keras.layers.BatchNormalization(epsilon=0.001))
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(700))
        model.add(tf.keras.layers.Dense(len(actions), activation=None))
        model.compile(optimizer='adam', loss='mse',
                      metrics=['accuracy'])
        return model
    
    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())
        print('weights of target net updated')
    
    #возможно, неверно индексирован action в target_f[0][action]
    def replay(self, batch_size=1):
        batch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in batch:
            target = reward
            if not done:
                    Q_next = self.target_model.predict(next_state)[0]
                    target = (reward + self.gamma *Q_next[np.argmax(Q_next)])
            target_f = agent.target_model.predict(state)
            print(target_f)
            choice = np.argmax(target_f[0])
            target_f[0][choice] = target
            print(target_f[0])
            #model train
            self.model.fit(state, target_f, epochs=1, verbose=0)
            print(self.model.evaluate(state, target_f, verbose=0)[0])
        print('End of learning')
            
    def save_network(self, path='/Users/s.matrosov/Downloads/Doom_Model'):
        # Saves model at specified path as h5 file
        self.model.save(path)
        print("Successfully saved network.")

    def load_network(self, path='/Users/s.matrosov/Downloads/Doom_Model/'):
        self.model = tf.keras.models.load_model(path)
        print("Succesfully loaded network.")
            
    def increase_explotation(self, decay_step = 1):
        explore_start = self.explore_stop + (self.explore_start - self.explore_stop) * np.exp(-self.decay_rate * decay_step)
        return explore_start

In [14]:
path = 'YOUR PATH'

In [15]:
agent = Agent()
#avaible_actions
agent.actions = agent.get_actions(actions)

#build new model
#agent.build_model()

#load existing model
agent.load_network(path)

Succesfully loaded network.


In [17]:
episodes = 1500
max_steps = 100 

game.init()
total_episode_rewards = []
timestamps = []
timestamp = 1

for i in range(episodes):
    print('Episode ' + str(i))
    game.new_episode()
    finished = game.is_episode_finished()
    if finished:
        print('Finished? - ' + str(finished))
        continue
    episode_rewards = []
    step = 0
    while step < max_steps:
        #стратовый фрэйм
        if game.is_new_episode():
            #получайем стек фреймов
            state = game.get_state().screen_buffer
            test_state = state
            #агент все это превращает в фрейм вида: фрейм1 минус фрейм2 минус фрейм3
            agent.frame = agent.make_frame(state)
            #теперь состояние - это и есть этот фрейм
            state = agent.frame
            
            agent.impressions = agent.get_impressions()
            
            action = agent.act(state)
            
            reward = game.make_action(action)
            episode_rewards.append(reward)
            
            next_state = game.get_state().screen_buffer
            agent.frame = agent.make_frame(next_state)
            next_state = agent.frame
            
            done = game.is_episode_finished()
            
            agent.experience(state, action, reward, next_state, done)
            step = step + 1
            time.sleep(0.02)
            
        #все остальные фреймы и до конца
        else:
            state = game.get_state().screen_buffer
            agent.frame = agent.make_frame(state)
            state = agent.frame
            
            #для бакета
            agent.impressions = agent.get_impressions()
            
            action = agent.act(state)
            
            reward = game.make_action(action)
            episode_rewards.append(reward)
            
            done = game.is_episode_finished()
            
            if done:
                step = max_steps
                #black screen
                next_state = np.zeros((3, 1024, 1280), dtype='uint8')
                agent.frame = agent.make_frame(next_state)
                next_state = agent.frame
                
                agent.experience(state, action, reward, next_state, done)    
                
                agent.explore_start = agent.increase_explotation()
                
                time.sleep(0.02)
                break
            
            else:
                next_state = game.get_state().screen_buffer
                agent.frame = agent.make_frame(next_state)
                next_state = agent.frame
                
                agent.experience(state, action, reward, next_state, done)
                agent.explore_start = agent.increase_explotation()
                
                step = step + 1
                time.sleep(0.02) 
    #training           
    agent.replay(agent.impressions)
    
    #update q targets weights
    if i+1 % 15 == 0:
        agent.update_target_model()
    
    #save model every 5 episodes
    if i+1 % 5 == 0:
        agent.save_network(path)
         
    #for plot
    total_episode_rewards.append(game.get_total_reward())
    timestamps.append(timestamp)
    timestamp = timestamp + 1
    
    print ("Result:", game.get_total_reward())
    time.sleep(4)
game.close()
agent.save_network(path)
agent.update_target_model()

Episode 0
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64
backet
64


0.27431800961494446
[[ 0.8339296  -0.29643723 -0.7714993 ]]
[-0.21774903 -0.29643723 -0.7714993 ]
0.26800405979156494
[[0.99266505 2.030627   7.039484  ]]
[0.99266505 2.030627   0.29143104]
8.559165000915527
[[ 1.4681038  0.5440096 -1.1385092]]
[ 0.4163906  0.5440096 -1.1385092]
0.06563318520784378
[[1.3331158 1.0125003 0.6402706]]
[-0.00996099  1.0125003   0.6402706 ]
0.09088406711816788
[[1.0618272 1.2322917 5.690296 ]]
[1.0618272 1.2322917 4.304459 ]
3.8766021728515625
[[ 1.4223919   0.49881557 -1.2196846 ]]
[ 0.31221017  0.49881557 -1.2196846 ]
0.43740490078926086
[[ 1.3608941  0.99499   -0.6917654]]
[ 0.32472858  0.99499    -0.6917654 ]
0.67710280418396
[[0.9347283 1.7984915 6.13238  ]]
[0.9347283 1.7984915 5.40036  ]
2.0551998615264893
[[ 1.4974154  1.0850705 -0.6056042]]
[ 0.42508233  1.0850705  -0.6056042 ]
0.2552556097507477
[[1.181949  1.2562882 5.8554444]]
[1.181949  1.2562882 4.518383 ]
2.0775392055511475
[[ 1.3582684  0.9520558 -0.6980307]]
[-0.17715068  0.9520558  -0.6980

2.4147722721099854
[[1.2981038 1.1027915 0.6047286]]
[-0.16948187  1.1027915   0.6047286 ]
0.268701434135437
[[ 0.8645692 -0.4965776 -0.8386568]]
[-0.20020854 -0.4965776  -0.8386568 ]
0.3451010286808014
[[0.81576693 1.677578   6.2639017 ]]
[0.81576693 1.677578   4.9235663 ]
1.5930185317993164
[[ 1.3731339   0.4376792  -0.39449963]]
[ 0.34548268  0.4376792  -0.39449963]
0.06928472965955734
[[ 1.3367656  0.2946334 -0.7739968]]
[-0.08888365  0.2946334  -0.7739968 ]
0.18404513597488403
[[0.91308045 1.7109455  5.9861693 ]]
[0.91308045 1.7109455  4.7612658 ]
0.6422863006591797
[[0.8815598 1.3856616 5.710307 ]]
[0.8815598 1.3856616 4.4418464]
0.7903237342834473
[[1.181949  1.2562882 5.8554444]]
[1.181949  1.2562882 4.518383 ]
1.1496533155441284
[[1.2602581  0.95585454 0.3492929 ]]
[0.19865263 0.95585454 0.3492929 ]
0.1235455572605133
[[ 1.4084023   0.4146708  -0.49139646]]
[ 0.33507997  0.4146708  -0.49139646]
0.620902955532074
[[ 0.8522709  -0.49196112 -0.7842437 ]]
[-0.17865928 -0.49196112 

Episode 857
Finished? - True
Episode 858
Finished? - True
Episode 859
Finished? - True
Episode 860
Finished? - True
Episode 861
Finished? - True
Episode 862
Finished? - True
Episode 863
Finished? - True
Episode 864
Finished? - True
Episode 865
Finished? - True
Episode 866
Finished? - True
Episode 867
Finished? - True
Episode 868
Finished? - True
Episode 869
Finished? - True
Episode 870
Finished? - True
Episode 871
Finished? - True
Episode 872
Finished? - True
Episode 873
Finished? - True
Episode 874
Finished? - True
Episode 875
Finished? - True
Episode 876
Finished? - True
Episode 877
Finished? - True
Episode 878
Finished? - True
Episode 879
Finished? - True
Episode 880
Finished? - True
Episode 881
Finished? - True
Episode 882
Finished? - True
Episode 883
Finished? - True
Episode 884
Finished? - True
Episode 885
Finished? - True
Episode 886
Finished? - True
Episode 887
Finished? - True
Episode 888
Finished? - True
Episode 889
Finished? - True
Episode 890
Finished? - True
Episode 891
Fi

INFO:tensorflow:Assets written to: /Users/s.matrosov/Downloads/Doom_Model/3/assets
Successfully saved network.
weights of target net updated


In [7]:
print(agent.explore_start)

0.9611815447607995


In [18]:
agent.save_network(path)
agent.update_target_model()

INFO:tensorflow:Assets written to: /Users/s.matrosov/Downloads/Doom_Model/3/assets
Successfully saved network.
weights of target net updated


In [None]:
plt.plot(timestamps, total_episode_rewards)

In [None]:
#agent will use model to act
agent.explore_start = 0.01

#games and steps in 1 episode
episodes = 1500
max_steps = 100 

game.init()
#game_rewards = []
#game_timestamps = []
#timestamp = 1

for i in range(episodes):
    print('Episode ' + str(i))
    game.new_episode()
    episode_rewards = []
    step = 0
    while step < max_steps:
        if game.is_new_episode():
            state = game.get_state().screen_buffer
            test_state = state
            agent.frame = agent.make_frame(state)
            state = agent.frame
            
            action = agent.act(state)
            
            reward = game.make_action(action)
            episode_rewards.append(reward)
            done = game.is_episode_finished()
            
            step = step + 1
            time.sleep(0.02)
        #все остальные фреймы и до конца
        else:
            state = game.get_state().screen_buffer
            agent.frame = agent.make_frame(state)
            state = agent.frame
            
            action = agent.act(state)
            
            reward = game.make_action(action)
            episode_rewards.append(reward)
            
            done = game.is_episode_finished()
            
            if done:
                step = max_steps
                total_reward = np.sum(episode_rewards)
                time.sleep(0.02)
                break
            else:
                step = step + 1
                if step == 199:
                    total_reward = np.sum(episode_rewards)
                time.sleep(0.02)        
    print ("Result:", game.get_total_reward())
    time.sleep(2)
game.close()