In [1]:
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import math
from collections import deque

import tensorflow as tf
import tensorflow.keras.backend as backend
from tensorflow.python.keras.backend import set_session
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard

from PIL import Image

from threading import Thread


from tqdm import tqdm

In [2]:
TRAIN_SET_SIZE = 50_000
MIN_TRAIN_SET_SIZE = 200
MINIBATCH_SIZE = 16
TRAIN_BATCH_SIZE = 2
SAVE_EVERY = 500
MODEL_NAME = '256x2'
MEMORY_FRACTION = 0.8

# image settings
IMG_HEIGHT = 100
IMG_WIDTH = 100
IMG_CHANNELS = 3

# environment settings
NUM_EPISODES = 20_000
# NUM_ACTIONS = 10 # MOVE_LEFT, MOVE_RIGHT, MOVE_UP, MOVE_DOWN, MOUSE_LEFT, MOUSE_RIGHT, MOUSE_UP, MOUSE_DOWN, LCLICK, RCLICK
NUM_ACTIONS = 2 # MOVE_UP, LCLICK
DISCOUNT = 0.99
EPISODE_TIME = 3
AGGREGATE_STATS_EVERY = 500
MIN_REWARD = -100

# exploration settings
epsilon = 1
EPSILON_DECAY = 0.9975
MIN_EPSILON = 0.001

In [3]:
class ModifiedTensorBoard(TensorBoard):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.step = 1
        self.writer = tf.summary.FileWriter(self.log_dir)
        
    def set_model(self, model):
        pass

    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(**logs)

    def on_batch_end(self, batch, logs=None):
        pass

    def on_train_end(self, _):
        pass

    def update_stats(self, **stats):
        self._write_logs(stats, self.step)

In [None]:
class MraftAgent:
    def __init__(self):
        self.sess = tf.Session()
        set_session(self.sess)
        
        self.graph = tf.get_default_graph()
        
        self.model = self.create_model()
        
        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())
        self.target_update_counter = 0
        
        self.train_set = deque(maxlen=TRAIN_SET_SIZE)
        
        self.tensorboard = ModifiedTensorBoard(log_dir=f'logs/miraft-model-{int(time.time())}')
        self.last_logged_step = 0
        self.cur_step = 0
        
        self.training_initialized = False
        
        self.terminate = False
        
    def create_model(self):
        model = Sequential([
            Conv2D(256, (3, 3), input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)),
            Activation('relu'),
            MaxPooling2D(2, 2),
            Dropout(0.2),
            
            Conv2D(256, (3, 3)),
            Activation('relu'),
            MaxPooling2D(2, 2),
            Dropout(0.2),
            
            Flatten(),
            Dense(64),
            Dense(NUM_ACTIONS, activation='linear')
        ])
        
        model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'])
        
        return model
    
    def get_q_values(self, state):
        with self.graph.as_default():
            return self.model.predict(np.array(state).reshape(-1, *state.shape)/ 255)[0]
    
    def train(self):
        if len(self.train_set) < MIN_TRAIN_SET_SIZE:
            return
        print('Training...')
        minibatch = random.sample(self.train_set, MINIBATCH_SIZE)
        
        current_states = np.array([transition[0] for transition in minibatch]) / 255.
            
        with self.graph.as_default():
            current_q_values = self.model.predict(current_states)
        
        future_states = np.array([transition[3] for transition in minibatch]) / 255
        with self.graph.as_default():
            future_q_values = self.target_model.predict(future_states)
        
        for i, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
            if not done:
                new_q = DISCOUNT * reward * np.max(future_q_values[i])
            else:
                new_q = reward
                
            current_q_list = current_q_values[i]
            current_q_list[action] = new_q
            
        X = current_states
        y = np.array(current_q_values)
        
        log_step = False
        if self.tensorboard.step > self.last_logged_step:
            log_step = True
            self.last_logged_step = self.tensorboard.step
            
        with self.graph.as_default():
            self.model.fit(
                X,
                y,
                batch_size=TRAIN_BATCH_SIZE,
                verbose=0,
                shuffle=False,
                callbacks=[self.tensorboard] if False else None
            )
        
        if log_step:
            self.target_update_counter += 1
        
        if self.target_update_counter > SAVE_EVERY:
            self.target_model.set_weights(self.model.get_weights)
            
    def train_in_loop(self):
         # iterate through once to setup..
        X = np.random.uniform(size=(1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)).astype(np.float32)
        y = np.random.uniform(size=(1, NUM_ACTIONS)).astype(np.float32)
        with self.graph.as_default(): # apparently useless statement but good practice to prevent overlapping graph values
            set_session(self.sess)
            self.model.fit(X, y, verbose=False, batch_size=1)
        
        self.training_initialized = True
        
        while True:
            if self.terminate:
                break
            self.train()
            time.sleep(0.01)
            

In [None]:
class MraftEnv:
    
    def __init__(self):
        self.episode_start = 0

    def reset(self):
        img = self.next_frame()
        return img
    
    def step(self, action):
        if action == 0: # move foreward
            pass
            # press and hold w for 1 second
            # delay by however much to keep things synchronous
        elif action == 1: # left click
            pass
            # press and hold for 1 second
            # delay by however much to keep things synchronous
        
        frame = self.next_frame()
        destroyed, picked_up = self.process(frame)
        
        if destroyed and picked_up:
            done = True
            reward = 100
        elif destroyed:
            done = False
            reward = -1
        else:
            done = False
            reward = -10
        
        if time.time() - episode_start > EPISODE_TIME:
            done = True

        return frame, reward, done
    
    def next_frame(self):
        img = cv2.imread('images/img.png')
        img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH))
        return img
    
    def process(self, frame):
        return random.randint(0, 1) == 1, random.randint(0, 1) == 1

In [None]:
random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
backend.set_session(tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)))

if not os.path.isdir('models'):
    os.makedirs('models')

In [None]:
env = MraftEnv()
agent = MraftAgent()

train_thread = Thread(target=agent.train_in_loop, daemon=True)
train_thread.start()

ep_rewards = []

while not agent.training_initialized:
    time.sleep(0.01)

for ep in range(NUM_EPISODES):
    print(f'Episode {ep}')
    episode_start = time.time()
    
    agent.tensorboard.step = ep
    
    episode_reward = 0
    
    current_state = env.reset()
    
    done = False
    while True:
        if random.uniform(0, 1) > epsilon:
            action = np.argmax(agent.get_q_values(current_state))
        else:
            action = random.randint(0, 1)
        
        next_state, reward, done = env.step(action)
        
        episode_reward += reward
        
        agent.train_set.append((current_state, action, reward, next_state, done))
        
        if done:
            break
            
    ep_rewards.append(episode_reward)
    
    # visualization
    if ep % AGGREGATE_STATS_EVERY == 0 or ep == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
#         agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward)

    if min_reward >= MIN_REWARD:
        agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg__{min_reward:_>7.2f}min.h5')

    
    if epsilon > MIN_EPSILON:
        epsilon = max(MIN_EPSILON, EPSILON_DECAY * epsilon)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Episode 0
Episode 1
Episode 2
Episode 3
Episode 4
Episode 5
Episode 6
Episode 7
Episode 8
Episode 9
Episode 10
Episode 11
Episode 12
Episode 13
Episode 14
Episode 15
Episode 16
Episode 17
Episode 18
Episode 19
Episode 20
Episode 21
Episode 22
Episode 23
Episode 24
Episode 25
Episode 26
Episode 27
Episode 28
Episode 29
Episode 30
Episode 31
Episode 32
Episode 33
Episode 34
Episode 35
Episode 36
Episode 37
Episode 38
Episode 39
Episode 40
Episode 41
Episode 42
Episode 43
Episode 44
Episode 45
Episode 46
Episode 47
Training...
Episode 48
Episode 49
Episode 50
Training...
Episode 51
Episode 52
Training...
Episode 53
Episode 54
Training...
Episode 55
Episode 56
Training...
Episode 57
Episode 58
Training...Episode

Training...
Episode 390
Training...Episode 391

Episode 392
Training...
Episode 393
Episode 394
Episode 395
Training...
Episode 396
Episode 397
Training...
Episode 398
Training...
Episode 399
Training...
Episode 400
Episode 401
Training...
Episode 402
Episode 403
Training...
Episode 404
Episode 405
Training...
Episode 406
Training...
Episode 407
Episode 408
Training...
Episode 409
Training...Episode 410

Episode 411
Training...
Episode 412
Training...
Episode 413
Episode 414
Training...
Episode 415
Episode 416
Training...
Episode 417
Episode 418
Training...
Episode 419
Episode 420
Training...
Episode 421
Training...Episode 422

Episode 423
Training...
Episode 424
Episode 425
Training...
Episode 426
Episode 427
Training...
Episode 428
Training...
Episode 429
Episode 430
Training...
Episode 431
Training...
Episode 432
Episode 433
Training...
Episode 434
Episode 435
Training...
Episode 436
Episode 437
Training...
Episode 438
Episode 439
Training...
Episode 440
Training...
Episode 441
Epis

Training...Episode 801

Episode 802
Training...
Episode 803
Training...
Episode 804
Episode 805
Training...
Episode 806
Episode 807
Training...
Episode 808
Episode 809
Training...
Episode 810
Training...
Episode 811
Episode 812
Training...
Episode 813
Training...
Episode 814
Training...
Episode 815
Training...
Episode 816
Episode 817
Training...
Episode 818
Training...
Episode 819
Training...
Episode 820
Episode 821
Training...
Episode 822
Episode 823
Training...
Training...
Episode 824
Training...Episode 825

Episode 826
Training...
Episode 827
Episode 828
Training...
Episode 829
Training...
Episode 830
Episode 831
Training...
Episode 832
Episode 833
Training...
Episode 834
Episode 835
Training...
Episode 836
Training...
Episode 837
Training...
Episode 838
Training...
Episode 839
Training...
Episode 840
Training...
Episode 841
Episode 842
Training...
Episode 843
Training...
Training...
Episode 844
Training...Episode 845

Episode 846
Training...
Episode 847
Training...
Episode 848
Epis

Exception in thread Thread-7:
Traceback (most recent call last):
  File "C:\Users\GZhang\AppData\Local\Programs\Python\Python36\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "C:\Users\GZhang\AppData\Local\Programs\Python\Python36\lib\threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-4-0ef6163df471>", line 110, in train_in_loop
    self.train()
  File "<ipython-input-4-0ef6163df471>", line 95, in train
    self.target_model.set_weights(self.model.get_weights)
  File "C:\Users\GZhang\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\network.py", line 404, in set_weights
    layer_weights = weights[:num_param]
TypeError: 'method' object is not subscriptable



Episode 874
Episode 875
Episode 876
Episode 877
Episode 878
Episode 879
Episode 880
Episode 881
Episode 882
Episode 883
Episode 884
Episode 885
Episode 886
Episode 887
Episode 888
Episode 889
Episode 890
Episode 891
Episode 892
Episode 893
Episode 894
Episode 895
Episode 896
Episode 897
Episode 898
Episode 899
Episode 900
Episode 901
Episode 902
Episode 903
Episode 904
Episode 905
Episode 906
Episode 907
Episode 908
Episode 909
