In [1]:
import random
import gym
import gym_game
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam


#from scores.score_logger import ScoreLogger

ENV_NAME = "game-v0"

GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 1000000
BATCH_SIZE = 20

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995


class DQNSolver:

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        #print("Q_values ",q_values[0],"Max ",np.argmax(q_values[0]))
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)


def Game():
    env = gym.make(ENV_NAME)                                     #
    #score_logger = ScoreLogger(ENV_NAME)
    observation_space = int(env.observation_space )         #
    action_space = len(env.action_space)                            #
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    count=0
    while True:
        run += 1
        #state=env.reset()
        if(count<5):
            state = env.reset()  
        else:
            print(" ")
            state=env.resetNewMaze()
            count=0
            
        state = np.reshape(state,  [1,observation_space])
        step = 0
        stepsNo=0
        totReward=0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            #print("Action ",action, env.action_space[action])
            state_next, reward, terminal, info = env.step(action)  
            #reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1,observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            stepsNo+=1
            totReward+=reward
            if (terminal):
                print (env.maze.name,"Run: " + str(run) + ", No of Steps: " + str(step), "Total Reward:",totReward,env.shortestRoute, env.count/env.number)
                file=open("RunsData.txt","a+")
                file.write(env.maze.name+" "+str(run)+" "+str(step)+" "+str(totReward)+" "+str(env.count/env.number)+"\n")
                file.close()
                if(step<=env.shortestRoute):
                    count+=0
                break
            dqn_solver.experience_replay()


if __name__ == "__main__":
    Game()
    

Using TensorFlow backend.
  result = entry_point.load(False)


Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Maze/Maze.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Maths/Cord.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Maze/MazeGenerator.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Agents/Worker.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Maths/Action.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Maths/State.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Agents/Prey.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Agents/Agent.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Main/Simulator.ipynb
Importing Jupyter notebook from /home/esha/anaconda3/Scripts/gym-game/Windows/MainWindow.ipynb
[0, 1, 2, 3, 4]
Name: Test
6x6
Start: (4, 5)
End: (1, 0) 
111121 5
110001 4
100011 3
101001 2
100011 1

KeyboardInterrupt: 

In [None]:
import numpy as np
a=np.array([[4],[5],[6]])
print(a)
a=np.append(a,[[7]], axis=0)
print(a)

In [None]:
b=np.empty([1,1])
b=np.append(b,[[1]],axis=0)
b=np.append(b,[[2]],axis=0)
b=np.delete(b,0,axis=0)
print(b)

In [None]:
from Maths.Action import Action
action_space=[]
number=2        
for i in range(0,len(Action)):
    action_space.append(Action(i))
import itertools
possible_actions=[action_space]*number
print(action_space)
self_action_space=np.asarray(list(itertools.product(*possible_actions)))
print(self_action_space[10][0])